In [2]:
TABLE='bigquery-public-data.stackoverflow.comments'
MAX_ROWS=1000000 # 1 Million

In [4]:
%%time

##############################
#       Pandas World         #
##############################

import pandas as pd
df = pd.read_gbq(TABLE, max_results=MAX_ROWS)[['id', 'text', 'score']]
print(df.head(10))

    id                                               text  score
0   10  It will help if you give some details of which...      6
1   25  infact it does. Look a the first lines of your...     10
2   27  "Currently + is implemented using StringBuffer...      7
3   41  I don't think that's the magic number he was r...     18
4   59  It's still very useful to know that magic numb...     12
5   96  This implementation is also nice if you wish t...      9
6  108  That's not full text searching, it's searching...      6
7  109  That's not full text searching, it's searching...      6
8  137  In vim you can open > 1 buffer. :e filename. T...      9
9  154  Sure, but what about a solution using O(1) mem...      8
CPU times: user 25.4 s, sys: 2.07 s, total: 27.4 s
Wall time: 1min 47s


In [5]:
# User defined function
# https://www.codespeedy.com/find-nth-prime-number-in-python/
def nth_prime(n):
    prime_numbers = [2,3]
    i=3
    if(0<n<=2):
        return prime_numbers[n-1]
    elif(n>2):
        while (True):
            i+=1
            status = True
            for j in range(2,int(i/2)+1):
                if(i%j==0):
                    status = False
                    break
            if(status==True):
                prime_numbers.append(i)
            if(len(prime_numbers)==n):
                break
        return prime_numbers[n-1]
    else:
        return -1

In [6]:
%%time

df = df.assign(n_prime=df['score'].apply(nth_prime))
print(df.head(10))

    id                                               text  score  n_prime
0   10  It will help if you give some details of which...      6       13
1   25  infact it does. Look a the first lines of your...     10       29
2   27  "Currently + is implemented using StringBuffer...      7       17
3   41  I don't think that's the magic number he was r...     18       61
4   59  It's still very useful to know that magic numb...     12       37
5   96  This implementation is also nice if you wish t...      9       23
6  108  That's not full text searching, it's searching...      6       13
7  109  That's not full text searching, it's searching...      6       13
8  137  In vim you can open > 1 buffer. :e filename. T...      9       23
9  154  Sure, but what about a solution using O(1) mem...      8       19
CPU times: user 5.42 s, sys: 4.21 ms, total: 5.42 s
Wall time: 5.43 s


In [7]:
%%time

##############################
#     BigFrames World        #
##############################

import bigframes
pd = bigframes.connect()

df = pd.read_gbq(TABLE).head(MAX_ROWS)[['id', 'text', 'score']]
print(df.head(10))

                id                                               text  score
1042755  103979625  Also, if you've got VS configured to sort usin...      0
1047568   59900587  A clean and maintainable way to calculate this...      0
1060459    2383118   The next question is "what use is this, really?"      1
1065436  111394474  I have finished this by using Nios II, after a...      0
1066265   89483681  This seems to be an XY problem. Why do you wan...      1
1070760   31147709  Ah, but the question is Uri, not URI. People h...      5
1076662   58094982                              Perfect ! Thank you !      0
1078346   80801724  Yes, i need to invoke a partikular function of...      0
1078756   52600144  i am lost, not in sandbox mode, verified domai...      5
1083546   36765535  add it as a possibility and set the default va...      0

[10 rows x 3 columns]
CPU times: user 754 ms, sys: 53.1 ms, total: 807 ms
Wall time: 38.3 s


In [8]:
# User simply imports the remote_function decorator and uses it on their udf
from bigframes import remote_function

# Tell the user what needs to be done offline before using BigFrames remote functions
help(remote_function)

Help on function remote_function in module bigframes.remote_function:

remote_function(input_types: 'typing.List[ibis_data_type]', output_type: 'ibis_data_type', session: 'typing.Optional[Session]' = None, bigquery_client: 'typing.Optional[bigquery.Client]' = None, dataset: 'typing.Optional[str]' = None, bigquery_connection: 'typing.Optional[str]' = None, reuse: 'bool' = True)
    Decorator to turn a user defined function into a BigQuery remote function.
    
    Parameters
    ----------
    input_types : list(ibis.expr.datatypes)
        List of input data types in the user defined function.
    output_type : ibis.expr.datatypes
        Data type of the output in the user defined function.
    session : bigframes.Session, Optional
        BigFrames session to use for getting default project, dataset and
        bigquery connection.
    bigquery_client : google.cloud.bigquery.Client, Optional
        Client to use for BigQuery operations. If this param is not provided
        then big

In [9]:
# BigFrames requires user to annotate the udf with input and output data types
# Here we are using ibis datatypes, but ideally we would adapt to go/bigframes-dtypes
import ibis.expr.datatypes as dt

# BigFrames requires user to specify a dataset where the BQ function would be persisted
# We are discussing if this can be avoided in go/bigframes-transient-data
dataset_id = 'my-first-project-331618.bigframes_dev'

# BigFrames user is a data scientist and may not have privileges to create a BQ connector
# and set it up for invoking a cloud function. They should get such a connector created
# from their cloud admin and use it with BigFrames remote functions.
bq_connector_name = 'bigframes-rf-conn'


In [11]:
from time import perf_counter
start = perf_counter()
#-------------------------------------------------------------------------------------

# User defined function
# https://www.codespeedy.com/find-nth-prime-number-in-python/
@remote_function([dt.int64()], dt.int64(), session=pd, dataset=dataset_id, bigquery_connection=bq_connector_name)
def nth_prime(n):
    prime_numbers = [2,3]
    i=3
    if(0<n<=2):
        return prime_numbers[n-1]
    elif(n>2):
        while (True):
            i+=1
            status = True
            for j in range(2,int(i/2)+1):
                if(i%j==0):
                    status = False
                    break
            if(status==True):
                prime_numbers.append(i)
            if(len(prime_numbers)==n):
                break
        return prime_numbers[n-1]
    else:
        return -1

#-------------------------------------------------------------------------------------
print(f"\nWall time: {(perf_counter()-start):.4f} s")

[INFO][2023-05-09 20:47:30,804][bigframes.remote_function] Existing cloud functions


NAME                               STATE   TRIGGER       REGION       ENVIRONMENT
batch-add                          ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-find-team-5toiyt1g       ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-find-team-dbq6wkfr       ACTIVE  HTTP Trigger  us-central1  2nd gen
python-chicago-map-from-web-http   ACTIVE  HTTP Trigger  us-central1  2nd gen
python-chicago-map-hardcoded-http  ACTIVE  HTTP Trigger  us-central1  2nd gen
python-example-udf-num-num-http    ACTIVE  HTTP Trigger  us-central1  2nd gen
python-http-add-one                ACTIVE  HTTP Trigger  us-east1     2nd gen
python-http-function               ACTIVE  HTTP Trigger  us-central1  2nd gen
python-http-function-1             ACTIVE  HTTP Trigger  us-central1  2nd gen
python-mul2-http                   ACTIVE  HTTP Trigger  us-central1  2nd gen
python-multiply-http               ACTIVE  HTTP Trigger  us-central1  2nd gen
python-mysquare-http               ACTIVE  HTTP Trigger  us-

[INFO][2023-05-09 20:47:36,050][bigframes.remote_function] Creating new cloud function: gcloud functions deploy bigframes-nth-prime --gen2 --runtime=python310 --project=my-first-project-331618 --region=us-central1 --source=/tmp/tmpm0jjryey --entry-point=udf_http --trigger-http --no-allow-unauthenticated
Preparing function...
.done.
Deploying function...
[Build]...................................................................................................................................................................................................................................................................................................................................................................................................................................................................done
[Service]............................................................................................................................................................................

buildConfig:
  build: projects/371300922916/locations/us-central1/builds/90cfeb12-bbce-4bcc-a83d-624d220f3484
  entryPoint: udf_http
  runtime: python310
  source:
    storageSource:
      bucket: gcf-v2-sources-371300922916-us-central1
      object: bigframes-nth-prime/function-source.zip
  sourceProvenance:
    resolvedStorageSource:
      bucket: gcf-v2-sources-371300922916-us-central1
      generation: '1683665261099867'
      object: bigframes-nth-prime/function-source.zip
environment: GEN_2
labels:
  deployment-tool: cli-gcloud
name: projects/my-first-project-331618/locations/us-central1/functions/bigframes-nth-prime
serviceConfig:
  allTrafficOnLatestRevision: true
  availableCpu: '0.1666'
  availableMemory: 256M
  ingressSettings: ALLOW_ALL
  maxInstanceCount: 100
  maxInstanceRequestConcurrency: 1
  revision: bigframes-nth-prime-00001-piw
  service: projects/my-first-project-331618/locations/us-central1/services/bigframes-nth-prime
  serviceAccountEmail: 371300922916-compute@d

[INFO][2023-05-09 20:48:45,843][bigframes.remote_function] Existing cloud functions


NAME                               STATE   TRIGGER       REGION       ENVIRONMENT
batch-add                          ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-find-team-5toiyt1g       ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-find-team-dbq6wkfr       ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-nth-prime                ACTIVE  HTTP Trigger  us-central1  2nd gen
python-chicago-map-from-web-http   ACTIVE  HTTP Trigger  us-central1  2nd gen
python-chicago-map-hardcoded-http  ACTIVE  HTTP Trigger  us-central1  2nd gen
python-example-udf-num-num-http    ACTIVE  HTTP Trigger  us-central1  2nd gen
python-http-add-one                ACTIVE  HTTP Trigger  us-east1     2nd gen
python-http-function               ACTIVE  HTTP Trigger  us-central1  2nd gen
python-http-function-1             ACTIVE  HTTP Trigger  us-central1  2nd gen
python-mul2-http                   ACTIVE  HTTP Trigger  us-central1  2nd gen
python-multiply-http               ACTIVE  HTTP Trigger  us-

[INFO][2023-05-09 20:48:49,858][bigframes.remote_function] Successfully created cloud function bigframes-nth-prime with uri (https://bigframes-nth-prime-76sbkhjuiq-uc.a.run.app)
[INFO][2023-05-09 20:48:50,105][bigframes.remote_function] Remote function bigframes_nth_prime already exists.



Wall time: 80.9254 s


In [12]:
%%time

df = df.assign(n_prime=df['score'].apply(nth_prime))
print(df.head(10))

                id                                               text  score  \
1042755  103979625  Also, if you've got VS configured to sort usin...      0   
1047568   59900587  A clean and maintainable way to calculate this...      0   
1060459    2383118   The next question is "what use is this, really?"      1   
1065436  111394474  I have finished this by using Nios II, after a...      0   
1066265   89483681  This seems to be an XY problem. Why do you wan...      1   
1070760   31147709  Ah, but the question is Uri, not URI. People h...      5   
1076662   58094982                              Perfect ! Thank you !      0   
1078346   80801724  Yes, i need to invoke a partikular function of...      0   
1078756   52600144  i am lost, not in sandbox mode, verified domai...      5   
1083546   36765535  add it as a possibility and set the default va...      0   

         n_prime  
1042755       -1  
1047568       -1  
1060459        2  
1065436       -1  
1066265        2  
10707

In [13]:
# This time the previously created remote function with the exact same source
# code should be found and reused

from time import perf_counter
start = perf_counter()
#-------------------------------------------------------------------------------------

# User defined function
# https://www.codespeedy.com/find-nth-prime-number-in-python/
@remote_function([dt.int64()], dt.int64(), session=pd, dataset=dataset_id, bigquery_connection=bq_connector_name)
def nth_prime(n):
    prime_numbers = [2,3]
    i=3
    if(0<n<=2):
        return prime_numbers[n-1]
    elif(n>2):
        while (True):
            i+=1
            status = True
            for j in range(2,int(i/2)+1):
                if(i%j==0):
                    status = False
                    break
            if(status==True):
                prime_numbers.append(i)
            if(len(prime_numbers)==n):
                break
        return prime_numbers[n-1]
    else:
        return -1

#-------------------------------------------------------------------------------------
print(f"\nWall time: {(perf_counter()-start):.4f} s")

[INFO][2023-05-09 20:52:34,593][bigframes.remote_function] Cloud function bigframes-nth-prime already exists.
[INFO][2023-05-09 20:52:34,761][bigframes.remote_function] Remote function bigframes_nth_prime already exists.



Wall time: 1.6284 s


In [15]:
%%time

# Usage of the remote function continues to work

df = df.assign(n_prime=df['score'].apply(nth_prime))
print(df.head(10))

                id                                               text  score  \
1042755  103979625  Also, if you've got VS configured to sort usin...      0   
1047568   59900587  A clean and maintainable way to calculate this...      0   
1060459    2383118   The next question is "what use is this, really?"      1   
1065436  111394474  I have finished this by using Nios II, after a...      0   
1066265   89483681  This seems to be an XY problem. Why do you wan...      1   
1070760   31147709  Ah, but the question is Uri, not URI. People h...      5   
1076662   58094982                              Perfect ! Thank you !      0   
1078346   80801724  Yes, i need to invoke a partikular function of...      0   
1078756   52600144  i am lost, not in sandbox mode, verified domai...      5   
1083546   36765535  add it as a possibility and set the default va...      0   

         n_prime  
1042755       -1  
1047568       -1  
1060459        2  
1065436       -1  
1066265        2  
10707