In [1]:
TABLE='bigquery-public-data.stackoverflow.comments'
MAX_ROWS=1000000 # 1 Million

In [2]:
%%time

##############################
#       Pandas World         #
##############################

import pandas as pd
df = pd.read_gbq(TABLE, max_results=MAX_ROWS)[['id', 'text', 'score']]
df.head(10)

CPU times: user 30.5 s, sys: 4.25 s, total: 34.8 s
Wall time: 1min 49s


Unnamed: 0,id,text,score
0,10,It will help if you give some details of which...,6
1,25,infact it does. Look a the first lines of your...,10
2,27,"""Currently + is implemented using StringBuffer...",7
3,41,I don't think that's the magic number he was r...,18
4,59,It's still very useful to know that magic numb...,12
5,96,This implementation is also nice if you wish t...,9
6,108,"That's not full text searching, it's searching...",6
7,109,"That's not full text searching, it's searching...",6
8,137,In vim you can open > 1 buffer. :e filename. T...,9
9,154,"Sure, but what about a solution using O(1) mem...",8


In [3]:
# User defined function
# https://www.codespeedy.com/find-nth-prime-number-in-python/
def nth_prime(n):
    prime_numbers = [2,3]
    i=3
    if(0<n<=2):
        return prime_numbers[n-1]
    elif(n>2):
        while (True):
            i+=1
            status = True
            for j in range(2,int(i/2)+1):
                if(i%j==0):
                    status = False
                    break
            if(status==True):
                prime_numbers.append(i)
            if(len(prime_numbers)==n):
                break
        return prime_numbers[n-1]
    else:
        return -1

In [4]:
%%time

df = df.assign(n_prime=df['score'].apply(nth_prime))
df.head(10)

CPU times: user 5.48 s, sys: 27.6 ms, total: 5.51 s
Wall time: 5.49 s


Unnamed: 0,id,text,score,n_prime
0,10,It will help if you give some details of which...,6,13
1,25,infact it does. Look a the first lines of your...,10,29
2,27,"""Currently + is implemented using StringBuffer...",7,17
3,41,I don't think that's the magic number he was r...,18,61
4,59,It's still very useful to know that magic numb...,12,37
5,96,This implementation is also nice if you wish t...,9,23
6,108,"That's not full text searching, it's searching...",6,13
7,109,"That's not full text searching, it's searching...",6,13
8,137,In vim you can open > 1 buffer. :e filename. T...,9,23
9,154,"Sure, but what about a solution using O(1) mem...",8,19


In [5]:
%%time

##############################
#     BigQuery DataFrames     #
##############################

import bigframes.pandas as pd

df = pd.read_gbq(TABLE).head(MAX_ROWS)[['id', 'text', 'score']]
df.head(10)

CPU times: user 835 ms, sys: 62.5 ms, total: 897 ms
Wall time: 24.1 s


Unnamed: 0,id,text,score
0,35153602,Are you trying to access a nested array?,0
1,35156124,Sorry I didn't include my timeout method befor...,0
2,35157401,As soon as I defined some sort of primary Key ...,0
3,35158649,@user3355243 I've edited it to give $values an...,0
4,35162039,I pasted my exes @Matt.,0
5,35162396,"@Gene - I do have separate fields, but I also ...",0
6,35162907,could you please provide any kind of sketch ho...,0
7,35166498,We use PhoneGap. Using Angular.,0
8,35170138,`decltype((int(Foo::*)(int))(&Foo::foo))` That...,0
9,35172348,"Yes, I found that one already. And I understan...",0


In [6]:
# Tell the user what needs to be done offline before using BigQuery DataFrame
# remote functions
help(pd.remote_function)

Help on function remote_function in module bigframes.pandas:

remote_function(input_types: 'List[type]', output_type: 'type', dataset: 'Optional[str]' = None, bigquery_connection: 'Optional[str]' = None, reuse: 'bool' = True)
    Decorator to turn a user defined function into a BigQuery remote function.
    
    Args:
        input_types : list(type)
            List of input data types in the user defined function.
        output_type : type
            Data type of the output in the user defined function.
        dataset : str, Optional
            Dataset to use to create a BigQuery function. It should be in
            `<project_id>.<dataset_name>` or `<dataset_name>` format. If this
            param is not provided then session dataset id would be used.
        bigquery_connection : str, Optional
            Name of the BigQuery connection. If it is pre created in the same
            location as the `bigquery_client.location` then it would be used,
            otherwise it would

In [7]:
# BigQuery DataFrames user is a data scientist and may not have privileges to
# create a BQ connector and set it up for invoking a cloud function. They
# should get such a connector created from their cloud admin and use it with
# BigQuery DataFrames remote functions. If the provided connection name does not
# exist, BigQuery DataFrames will try to create it on the fly assuming the user
# has sufficient privileges.
bq_connection_name = 'bigframes-rf-conn'

In [8]:
from time import perf_counter
start = perf_counter()
#-------------------------------------------------------------------------------------

# User defined function
# https://www.codespeedy.com/find-nth-prime-number-in-python/
@pd.remote_function([int], int, bigquery_connection=bq_connection_name)
def nth_prime(n):
    prime_numbers = [2,3]
    i=3
    if(0<n<=2):
        return prime_numbers[n-1]
    elif(n>2):
        while (True):
            i+=1
            status = True
            for j in range(2,int(i/2)+1):
                if(i%j==0):
                    status = False
                    break
            if(status==True):
                prime_numbers.append(i)
            if(len(prime_numbers)==n):
                break
        return prime_numbers[n-1]
    else:
        return -1

#-------------------------------------------------------------------------------------
print(f"\nWall time: {(perf_counter()-start):.4f} s")

[INFO][2023-06-29 01:03:30,557][bigframes.remote_function] Creating new cloud function: gcloud functions deploy bigframes-ac72c931423f68dddb4f84f6754e2b28 --gen2 --runtime=python310 --project=bigframes-dev --region=us-central1 --source=/tmp/tmposspvw3v --entry-point=udf_http --trigger-http --no-allow-unauthenticated
Preparing function...
.done.
Deploying function...
[Build]......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................done
[Service]....................................................................................................done
Done.
You can view 

buildConfig:
  build: projects/1084210331973/locations/us-central1/builds/57859622-4287-47bc-ab86-3cd3f4c47548
  entryPoint: udf_http
  runtime: python310
  source:
    storageSource:
      bucket: gcf-v2-sources-1084210331973-us-central1
      object: bigframes-ac72c931423f68dddb4f84f6754e2b28/function-source.zip
  sourceProvenance:
    resolvedStorageSource:
      bucket: gcf-v2-sources-1084210331973-us-central1
      generation: '1688000614202977'
      object: bigframes-ac72c931423f68dddb4f84f6754e2b28/function-source.zip
environment: GEN_2
labels:
  deployment-tool: cli-gcloud
name: projects/bigframes-dev/locations/us-central1/functions/bigframes-ac72c931423f68dddb4f84f6754e2b28
serviceConfig:
  allTrafficOnLatestRevision: true
  availableCpu: '0.1666'
  availableMemory: 256M
  ingressSettings: ALLOW_ALL
  maxInstanceCount: 100
  maxInstanceRequestConcurrency: 1
  revision: bigframes-ac72c931423f68dddb4f84f6754e2b28-00001-faf
  service: projects/bigframes-dev/locations/us-central1

[INFO][2023-06-29 01:04:37,239][bigframes.remote_function] Successfully created cloud function bigframes-ac72c931423f68dddb4f84f6754e2b28 with uri (https://bigframes-ac72c931423f68dddb4f84f6754e2b28-7krlje3eoq-uc.a.run.app)
[INFO][2023-06-29 01:04:42,402][bigframes.remote_function] Connector bigframes-rf-conn already exists
[INFO][2023-06-29 01:04:42,404][bigframes.remote_function] Creating BQ remote function: 
    CREATE OR REPLACE FUNCTION `bigframes-dev.bigframes_temp_us`.bigframes_ac72c931423f68dddb4f84f6754e2b28(n INT64)
    RETURNS INT64
    REMOTE WITH CONNECTION `bigframes-dev.us.bigframes-rf-conn`
    OPTIONS (
      endpoint = "https://bigframes-ac72c931423f68dddb4f84f6754e2b28-7krlje3eoq-uc.a.run.app"
    )
[INFO][2023-06-29 01:04:43,456][bigframes.remote_function] Created remote function bigframes-dev.bigframes_temp_us.bigframes_ac72c931423f68dddb4f84f6754e2b28



Wall time: 76.2659 s


In [9]:
%%time

df = df.assign(n_prime=df['score'].apply(nth_prime))
df.head(10)

CPU times: user 22.2 ms, sys: 0 ns, total: 22.2 ms
Wall time: 22.4 ms


Unnamed: 0,id,text,score,n_prime
0,35153602,Are you trying to access a nested array?,0,-1
1,35156124,Sorry I didn't include my timeout method befor...,0,-1
2,35157401,As soon as I defined some sort of primary Key ...,0,-1
3,35158649,@user3355243 I've edited it to give $values an...,0,-1
4,35162039,I pasted my exes @Matt.,0,-1
5,35162396,"@Gene - I do have separate fields, but I also ...",0,-1
6,35162907,could you please provide any kind of sketch ho...,0,-1
7,35166498,We use PhoneGap. Using Angular.,0,-1
8,35170138,`decltype((int(Foo::*)(int))(&Foo::foo))` That...,0,-1
9,35172348,"Yes, I found that one already. And I understan...",0,-1
