In [1]:
TABLE='bigquery-public-data.stackoverflow.comments'
MAX_ROWS=1000000 # 1 Million

In [2]:
%%time

##############################
#       Pandas World         #
##############################

import pandas as pd
df = pd.read_gbq(TABLE, max_results=MAX_ROWS)[['id', 'text', 'score']]
print(df.head(10))

    id                                               text  score
0   10  It will help if you give some details of which...      6
1   25  infact it does. Look a the first lines of your...     10
2   27  "Currently + is implemented using StringBuffer...      7
3   41  I don't think that's the magic number he was r...     18
4   59  It's still very useful to know that magic numb...     12
5   96  This implementation is also nice if you wish t...      9
6  108  That's not full text searching, it's searching...      6
7  109  That's not full text searching, it's searching...      6
8  137  In vim you can open > 1 buffer. :e filename. T...      9
9  154  Sure, but what about a solution using O(1) mem...      8
CPU times: user 28.9 s, sys: 5.24 s, total: 34.2 s
Wall time: 1min 37s


In [3]:
# User defined function
# https://www.codespeedy.com/find-nth-prime-number-in-python/
def nth_prime(n):
    prime_numbers = [2,3]
    i=3
    if(0<n<=2):
        return prime_numbers[n-1]
    elif(n>2):
        while (True):
            i+=1
            status = True
            for j in range(2,int(i/2)+1):
                if(i%j==0):
                    status = False
                    break
            if(status==True):
                prime_numbers.append(i)
            if(len(prime_numbers)==n):
                break
        return prime_numbers[n-1]
    else:
        return -1

In [4]:
%%time

df = df.assign(n_prime=df['score'].apply(nth_prime))
print(df.head(10))

    id                                               text  score  n_prime
0   10  It will help if you give some details of which...      6       13
1   25  infact it does. Look a the first lines of your...     10       29
2   27  "Currently + is implemented using StringBuffer...      7       17
3   41  I don't think that's the magic number he was r...     18       61
4   59  It's still very useful to know that magic numb...     12       37
5   96  This implementation is also nice if you wish t...      9       23
6  108  That's not full text searching, it's searching...      6       13
7  109  That's not full text searching, it's searching...      6       13
8  137  In vim you can open > 1 buffer. :e filename. T...      9       23
9  154  Sure, but what about a solution using O(1) mem...      8       19
CPU times: user 5.31 s, sys: 38.7 ms, total: 5.35 s
Wall time: 5.34 s


In [5]:
%%time

##############################
#     BigFrames World        #
##############################

import bigframes
pd = bigframes.connect()

df = pd.read_gbq(TABLE).head(MAX_ROWS)[['id', 'text', 'score']]
print(df.head(10))

               id                                               text  score
340      78648826  @richardb: that's because you're using order=7...      0
2002     84989443  I believe that's trying to find the Ruby "ruby...      0
31800    53115380  Can you confirm that you don't see this issue ...      0
41033    34963916  Not should, must. You can not create managed o...      0
79660   119256298  @AbhishekMane - It's a generic frog -- a super...      1
101604   91852998  ```'branch_name'=>'required|max:255|unique:bra...      0
117199    4483403  I've experimented with things that ended up pr...      0
143485   24460060  @sadmicrowave could you provide how do you cre...      0
159450  104558415  I hate that this solved my problem, because it...      0
174563   90970217  That is right, but he is conditionally adding ...      0

[10 rows x 3 columns]
CPU times: user 796 ms, sys: 163 ms, total: 959 ms
Wall time: 4min 49s


In [6]:
# User simply imports the remote_function decorator and uses it on their udf
from bigframes import remote_function

# Tell the user what needs to be done offline before using BigFrames remote functions
help(remote_function)

Help on function remote_function in module bigframes.remote_function:

remote_function(input_types: 'typing.Sequence[type]', output_type: 'type', session: 'typing.Optional[Session]' = None, bigquery_client: 'typing.Optional[bigquery.Client]' = None, dataset: 'typing.Optional[str]' = None, bigquery_connection: 'typing.Optional[str]' = None, reuse: 'bool' = True)
    Decorator to turn a user defined function into a BigQuery remote function.
    
    Args:
        input_types : list(type)
            List of input data types in the user defined function.
        output_type : type
            Data type of the output in the user defined function.
        session : bigframes.Session, Optional
            BigFrames session to use for getting default project, dataset and
            bigquery connection.
        bigquery_client : google.cloud.bigquery.Client, Optional
            Client to use for BigQuery operations. If this param is not provided
            then bigquery client from the sess

In [7]:
# BigFrames requires user to specify a dataset where the BQ function would be persisted
# We are discussing if this can be avoided in go/bigframes-transient-data
dataset_id = 'my-first-project-331618.bigframes_dev'

# BigFrames user is a data scientist and may not have privileges to create a BQ connector
# and set it up for invoking a cloud function. They should get such a connector created
# from their cloud admin and use it with BigFrames remote functions.
bq_connector_name = 'bigframes-rf-conn'

In [8]:
from time import perf_counter
start = perf_counter()
#-------------------------------------------------------------------------------------

# User defined function
# https://www.codespeedy.com/find-nth-prime-number-in-python/
@remote_function([int], int, session=pd, dataset=dataset_id, bigquery_connection=bq_connector_name)
def nth_prime(n):
    prime_numbers = [2,3]
    i=3
    if(0<n<=2):
        return prime_numbers[n-1]
    elif(n>2):
        while (True):
            i+=1
            status = True
            for j in range(2,int(i/2)+1):
                if(i%j==0):
                    status = False
                    break
            if(status==True):
                prime_numbers.append(i)
            if(len(prime_numbers)==n):
                break
        return prime_numbers[n-1]
    else:
        return -1

#-------------------------------------------------------------------------------------
print(f"\nWall time: {(perf_counter()-start):.4f} s")

[INFO][2023-05-16 17:35:40,162][bigframes.remote_function] Existing cloud functions


NAME                               STATE   TRIGGER       REGION       ENVIRONMENT
batch-add                          ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-find-team-5toiyt1g       ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-find-team-dbq6wkfr       ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-nth-prime                ACTIVE  HTTP Trigger  us-central1  2nd gen
python-chicago-map-from-web-http   ACTIVE  HTTP Trigger  us-central1  2nd gen
python-chicago-map-hardcoded-http  ACTIVE  HTTP Trigger  us-central1  2nd gen
python-example-udf-num-num-http    ACTIVE  HTTP Trigger  us-central1  2nd gen
python-http-add-one                ACTIVE  HTTP Trigger  us-east1     2nd gen
python-http-function               ACTIVE  HTTP Trigger  us-central1  2nd gen
python-http-function-1             ACTIVE  HTTP Trigger  us-central1  2nd gen
python-mul2-http                   ACTIVE  HTTP Trigger  us-central1  2nd gen
python-multiply-http               ACTIVE  HTTP Trigger  us-

[INFO][2023-05-16 17:35:44,339][bigframes.remote_function] Creating new cloud function: gcloud functions deploy bigframes-1fa7febb712b7e7f9f4cb62ac3175d04 --gen2 --runtime=python310 --project=my-first-project-331618 --region=us-central1 --source=/tmp/tmpdz30hn5u --entry-point=udf_http --trigger-http --no-allow-unauthenticated
Preparing function...
.done.
Deploying function...
[Build]..................................................................................................................................................................................................................................................................................................................................................................................................................................done
[Service].....................................................................................................................................................done
Done.
You can view your func

buildConfig:
  build: projects/371300922916/locations/us-central1/builds/45faa208-c7eb-4ade-a349-4f3f2e91609e
  entryPoint: udf_http
  runtime: python310
  source:
    storageSource:
      bucket: gcf-v2-sources-371300922916-us-central1
      object: bigframes-1fa7febb712b7e7f9f4cb62ac3175d04/function-source.zip
  sourceProvenance:
    resolvedStorageSource:
      bucket: gcf-v2-sources-371300922916-us-central1
      generation: '1684258547710868'
      object: bigframes-1fa7febb712b7e7f9f4cb62ac3175d04/function-source.zip
environment: GEN_2
labels:
  deployment-tool: cli-gcloud
name: projects/my-first-project-331618/locations/us-central1/functions/bigframes-1fa7febb712b7e7f9f4cb62ac3175d04
serviceConfig:
  allTrafficOnLatestRevision: true
  availableCpu: '0.1666'
  availableMemory: 256M
  ingressSettings: ALLOW_ALL
  maxInstanceCount: 100
  maxInstanceRequestConcurrency: 1
  revision: bigframes-1fa7febb712b7e7f9f4cb62ac3175d04-00001-lel
  service: projects/my-first-project-331618/loca

[INFO][2023-05-16 17:36:46,533][bigframes.remote_function] Existing cloud functions


NAME                                        STATE   TRIGGER       REGION       ENVIRONMENT
batch-add                                   ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-1fa7febb712b7e7f9f4cb62ac3175d04  ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-find-team-5toiyt1g                ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-find-team-dbq6wkfr                ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-nth-prime                         ACTIVE  HTTP Trigger  us-central1  2nd gen
python-chicago-map-from-web-http            ACTIVE  HTTP Trigger  us-central1  2nd gen
python-chicago-map-hardcoded-http           ACTIVE  HTTP Trigger  us-central1  2nd gen
python-example-udf-num-num-http             ACTIVE  HTTP Trigger  us-central1  2nd gen
python-http-add-one                         ACTIVE  HTTP Trigger  us-east1     2nd gen
python-http-function                        ACTIVE  HTTP Trigger  us-central1  2nd gen
python-http-function-1                 

[INFO][2023-05-16 17:36:50,310][bigframes.remote_function] Successfully created cloud function bigframes-1fa7febb712b7e7f9f4cb62ac3175d04 with uri (https://bigframes-1fa7febb712b7e7f9f4cb62ac3175d04-76sbkhjuiq-uc.a.run.app)
[INFO][2023-05-16 17:36:50,513][bigframes.remote_function] List of existing connections
  import imp
I0516 17:36:51.718043 140263356673088 bigquery_client.py:730] There is no apilog flag so non-critical logging is disabled.


                    name                     friendlyName   description    Last modified         type        hasCredential                                            properties                                            
 ------------------------------------------ -------------- ------------- ----------------- ---------------- --------------- ----------------------------------------------------------------------------------------------- 
  371300922916.us.batch-add-conn                                          01 Mar 09:50:24   CLOUD_RESOURCE   False           {"serviceAccountId": "bqcx-371300922916-yrnm@gcp-sa-bigquery-condel.iam.gserviceaccount.com"}  
  371300922916.us.bigframes-rf-conn                                       08 Mar 08:28:37   CLOUD_RESOURCE   False           {"serviceAccountId": "bqcx-371300922916-x5q0@gcp-sa-bigquery-condel.iam.gserviceaccount.com"}  
  371300922916.us.python-mul2-http-conn                                   07 Mar 08:11:25   CLOUD_RESOURCE   False  

[INFO][2023-05-16 17:36:53,893][bigframes.remote_function] Connector bigframes-rf-conn already exists
[INFO][2023-05-16 17:36:53,895][bigframes.remote_function] Creating BQ remote function: bq query --use_legacy_sql=false '
    CREATE OR REPLACE FUNCTION `my-first-project-331618.bigframes_dev`.bigframes_1fa7febb712b7e7f9f4cb62ac3175d04(n INT64)
    RETURNS INT64
    REMOTE WITH CONNECTION `my-first-project-331618.us.bigframes-rf-conn`
    OPTIONS (
      endpoint = "https://bigframes-1fa7febb712b7e7f9f4cb62ac3175d04-76sbkhjuiq-uc.a.run.app"
    )'
  import imp
I0516 17:36:55.045917 139711291608128 bigquery_client.py:730] There is no apilog flag so non-critical logging is disabled.
Waiting on bqjob_r227b1308df1d8f21_0000018825a2ef09_1 ... (0s) Current status: DONE   


Created my-first-project-331618.bigframes_dev.bigframes_1fa7febb712b7e7f9f4cb62ac3175d04


[INFO][2023-05-16 17:36:57,200][bigframes.remote_function] Created remote function `my-first-project-331618.bigframes_dev`.bigframes_1fa7febb712b7e7f9f4cb62ac3175d04



Wall time: 78.7236 s


In [9]:
%%time

df = df.assign(n_prime=df['score'].apply(nth_prime))
print(df.head(10))

               id                                               text  score  \
340      78648826  @richardb: that's because you're using order=7...      0   
2002     84989443  I believe that's trying to find the Ruby "ruby...      0   
31800    53115380  Can you confirm that you don't see this issue ...      0   
41033    34963916  Not should, must. You can not create managed o...      0   
79660   119256298  @AbhishekMane - It's a generic frog -- a super...      1   
101604   91852998  ```'branch_name'=>'required|max:255|unique:bra...      0   
117199    4483403  I've experimented with things that ended up pr...      0   
143485   24460060  @sadmicrowave could you provide how do you cre...      0   
159450  104558415  I hate that this solved my problem, because it...      0   
174563   90970217  That is right, but he is conditionally adding ...      0   

        n_prime  
340          -1  
2002         -1  
31800        -1  
41033        -1  
79660         2  
101604       -1  
1171

In [10]:
# This time the previously created remote function with the exact same source
# code should be found and reused

from time import perf_counter
start = perf_counter()
#-------------------------------------------------------------------------------------

# User defined function
# https://www.codespeedy.com/find-nth-prime-number-in-python/
@remote_function([int], int, session=pd, dataset=dataset_id, bigquery_connection=bq_connector_name)
def nth_prime(n):
    prime_numbers = [2,3]
    i=3
    if(0<n<=2):
        return prime_numbers[n-1]
    elif(n>2):
        while (True):
            i+=1
            status = True
            for j in range(2,int(i/2)+1):
                if(i%j==0):
                    status = False
                    break
            if(status==True):
                prime_numbers.append(i)
            if(len(prime_numbers)==n):
                break
        return prime_numbers[n-1]
    else:
        return -1

#-------------------------------------------------------------------------------------
print(f"\nWall time: {(perf_counter()-start):.4f} s")

[INFO][2023-05-16 17:37:05,234][bigframes.remote_function] Existing cloud functions


NAME                                        STATE   TRIGGER       REGION       ENVIRONMENT
batch-add                                   ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-1fa7febb712b7e7f9f4cb62ac3175d04  ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-find-team-5toiyt1g                ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-find-team-dbq6wkfr                ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-nth-prime                         ACTIVE  HTTP Trigger  us-central1  2nd gen
python-chicago-map-from-web-http            ACTIVE  HTTP Trigger  us-central1  2nd gen
python-chicago-map-hardcoded-http           ACTIVE  HTTP Trigger  us-central1  2nd gen
python-example-udf-num-num-http             ACTIVE  HTTP Trigger  us-central1  2nd gen
python-http-add-one                         ACTIVE  HTTP Trigger  us-east1     2nd gen
python-http-function                        ACTIVE  HTTP Trigger  us-central1  2nd gen
python-http-function-1                 

[INFO][2023-05-16 17:37:08,816][bigframes.remote_function] Creating new cloud function: gcloud functions deploy bigframes-afe029771fe092e5dfde933dcd19b1ea --gen2 --runtime=python310 --project=my-first-project-331618 --region=us-central1 --source=/tmp/tmpneh0kp3u --entry-point=udf_http --trigger-http --no-allow-unauthenticated
Preparing function...
............done.
Deploying function...
[Build]......................................................................................................................................................................................................................................................................................................................................................................................................................................done
[Service]..................................................................................done
Done.
You can view your function in the Cloud Console here: https://console.clou

buildConfig:
  build: projects/371300922916/locations/us-central1/builds/7942886a-6670-4836-bd6a-eceaaedd6dab
  entryPoint: udf_http
  runtime: python310
  source:
    storageSource:
      bucket: gcf-v2-sources-371300922916-us-central1
      object: bigframes-afe029771fe092e5dfde933dcd19b1ea/function-source.zip
  sourceProvenance:
    resolvedStorageSource:
      bucket: gcf-v2-sources-371300922916-us-central1
      generation: '1684258631623980'
      object: bigframes-afe029771fe092e5dfde933dcd19b1ea/function-source.zip
environment: GEN_2
labels:
  deployment-tool: cli-gcloud
name: projects/my-first-project-331618/locations/us-central1/functions/bigframes-afe029771fe092e5dfde933dcd19b1ea
serviceConfig:
  allTrafficOnLatestRevision: true
  availableCpu: '0.1666'
  availableMemory: 256M
  ingressSettings: ALLOW_ALL
  maxInstanceCount: 100
  maxInstanceRequestConcurrency: 1
  revision: bigframes-afe029771fe092e5dfde933dcd19b1ea-00001-lap
  service: projects/my-first-project-331618/loca

[INFO][2023-05-16 17:38:04,897][bigframes.remote_function] Existing cloud functions


NAME                                        STATE   TRIGGER       REGION       ENVIRONMENT
batch-add                                   ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-1fa7febb712b7e7f9f4cb62ac3175d04  ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-afe029771fe092e5dfde933dcd19b1ea  ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-find-team-5toiyt1g                ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-find-team-dbq6wkfr                ACTIVE  HTTP Trigger  us-central1  2nd gen
bigframes-nth-prime                         ACTIVE  HTTP Trigger  us-central1  2nd gen
python-chicago-map-from-web-http            ACTIVE  HTTP Trigger  us-central1  2nd gen
python-chicago-map-hardcoded-http           ACTIVE  HTTP Trigger  us-central1  2nd gen
python-example-udf-num-num-http             ACTIVE  HTTP Trigger  us-central1  2nd gen
python-http-add-one                         ACTIVE  HTTP Trigger  us-east1     2nd gen
python-http-function                   

[INFO][2023-05-16 17:38:08,819][bigframes.remote_function] Successfully created cloud function bigframes-afe029771fe092e5dfde933dcd19b1ea with uri (https://bigframes-afe029771fe092e5dfde933dcd19b1ea-76sbkhjuiq-uc.a.run.app)
[INFO][2023-05-16 17:38:08,994][bigframes.remote_function] List of existing connections
  import imp
I0516 17:38:10.132915 140438353005632 bigquery_client.py:730] There is no apilog flag so non-critical logging is disabled.


                    name                     friendlyName   description    Last modified         type        hasCredential                                            properties                                            
 ------------------------------------------ -------------- ------------- ----------------- ---------------- --------------- ----------------------------------------------------------------------------------------------- 
  371300922916.us.batch-add-conn                                          01 Mar 09:50:24   CLOUD_RESOURCE   False           {"serviceAccountId": "bqcx-371300922916-yrnm@gcp-sa-bigquery-condel.iam.gserviceaccount.com"}  
  371300922916.us.bigframes-rf-conn                                       08 Mar 08:28:37   CLOUD_RESOURCE   False           {"serviceAccountId": "bqcx-371300922916-x5q0@gcp-sa-bigquery-condel.iam.gserviceaccount.com"}  
  371300922916.us.python-mul2-http-conn                                   07 Mar 08:11:25   CLOUD_RESOURCE   False  

[INFO][2023-05-16 17:38:12,159][bigframes.remote_function] Connector bigframes-rf-conn already exists
[INFO][2023-05-16 17:38:12,161][bigframes.remote_function] Creating BQ remote function: bq query --use_legacy_sql=false '
    CREATE OR REPLACE FUNCTION `my-first-project-331618.bigframes_dev`.bigframes_afe029771fe092e5dfde933dcd19b1ea(n INT64)
    RETURNS INT64
    REMOTE WITH CONNECTION `my-first-project-331618.us.bigframes-rf-conn`
    OPTIONS (
      endpoint = "https://bigframes-afe029771fe092e5dfde933dcd19b1ea-76sbkhjuiq-uc.a.run.app"
    )'
  import imp
I0516 17:38:13.306264 140291393309760 bigquery_client.py:730] There is no apilog flag so non-critical logging is disabled.
Waiting on bqjob_r17cfbf368ee69a54_0000018825a420be_1 ... (0s) Current status: DONE   


Created my-first-project-331618.bigframes_dev.bigframes_afe029771fe092e5dfde933dcd19b1ea


[INFO][2023-05-16 17:38:15,391][bigframes.remote_function] Created remote function `my-first-project-331618.bigframes_dev`.bigframes_afe029771fe092e5dfde933dcd19b1ea



Wall time: 71.6585 s


In [11]:
%%time

# Usage of the remote function continues to work

df = df.assign(n_prime=df['score'].apply(nth_prime))
print(df.head(10))

               id                                               text  score  \
340      78648826  @richardb: that's because you're using order=7...      0   
2002     84989443  I believe that's trying to find the Ruby "ruby...      0   
31800    53115380  Can you confirm that you don't see this issue ...      0   
41033    34963916  Not should, must. You can not create managed o...      0   
79660   119256298  @AbhishekMane - It's a generic frog -- a super...      1   
101604   91852998  ```'branch_name'=>'required|max:255|unique:bra...      0   
117199    4483403  I've experimented with things that ended up pr...      0   
143485   24460060  @sadmicrowave could you provide how do you cre...      0   
159450  104558415  I hate that this solved my problem, because it...      0   
174563   90970217  That is right, but he is conditionally adding ...      0   

        n_prime  
340          -1  
2002         -1  
31800        -1  
41033        -1  
79660         2  
101604       -1  
1171