In [33]:
TABLE='bigquery-public-data.stackoverflow.comments'
MAX_ROWS=1000000 # 1 Million

In [34]:
%%time

##############################
#       Pandas World         #
##############################

import pandas as pd
df = pd.read_gbq(TABLE, max_results=MAX_ROWS)[['id', 'text', 'score']]
df.head(10)

[INFO][2023-06-23 21:06:20,395][pandas_gbq.gbq] Total time taken 104.5 s.
Finished at 2023-06-23 21:06:20.


CPU times: user 27.8 s, sys: 2.76 s, total: 30.5 s
Wall time: 1min 44s


Unnamed: 0,id,text,score
0,10,It will help if you give some details of which...,6
1,25,infact it does. Look a the first lines of your...,10
2,27,"""Currently + is implemented using StringBuffer...",7
3,41,I don't think that's the magic number he was r...,18
4,59,It's still very useful to know that magic numb...,12
5,96,This implementation is also nice if you wish t...,9
6,108,"That's not full text searching, it's searching...",6
7,109,"That's not full text searching, it's searching...",6
8,137,In vim you can open > 1 buffer. :e filename. T...,9
9,154,"Sure, but what about a solution using O(1) mem...",8


In [35]:
# User defined function
# https://www.codespeedy.com/find-nth-prime-number-in-python/
def nth_prime(n):
    prime_numbers = [2,3]
    i=3
    if(0<n<=2):
        return prime_numbers[n-1]
    elif(n>2):
        while (True):
            i+=1
            status = True
            for j in range(2,int(i/2)+1):
                if(i%j==0):
                    status = False
                    break
            if(status==True):
                prime_numbers.append(i)
            if(len(prime_numbers)==n):
                break
        return prime_numbers[n-1]
    else:
        return -1

In [36]:
%%time

df = df.assign(n_prime=df['score'].apply(nth_prime))
df.head(10)

CPU times: user 5.91 s, sys: 0 ns, total: 5.91 s
Wall time: 5.89 s


Unnamed: 0,id,text,score,n_prime
0,10,It will help if you give some details of which...,6,13
1,25,infact it does. Look a the first lines of your...,10,29
2,27,"""Currently + is implemented using StringBuffer...",7,17
3,41,I don't think that's the magic number he was r...,18,61
4,59,It's still very useful to know that magic numb...,12,37
5,96,This implementation is also nice if you wish t...,9,23
6,108,"That's not full text searching, it's searching...",6,13
7,109,"That's not full text searching, it's searching...",6,13
8,137,In vim you can open > 1 buffer. :e filename. T...,9,23
9,154,"Sure, but what about a solution using O(1) mem...",8,19


In [37]:
%%time

##############################
#     BigFrames World        #
##############################

import bigframes
pd = bigframes.connect()

df = pd.read_gbq(TABLE).head(MAX_ROWS)[['id', 'text', 'score']]
df.head(10)

CPU times: user 197 ms, sys: 0 ns, total: 197 ms
Wall time: 18.4 s


Unnamed: 0,id,text,score
0,94308208,"@ndtreviv: No, they’re not the same object. `{...",25
1,95766206,Fix your data model so you are not storing num...,6
2,85280004,2017: Even StackExchange stopping support: Dea...,6
3,85287880,"Yes, use the body. The second return parameter...",4
4,12543196,"@DavidHeffernan alright, I updated my answer w...",0
5,113487636,Here is a link to where I finally solved this....,0
6,113518947,"I also need an answer to this, one can hope fo...",0
7,100053673,"Put the 10 dataframes in a list, write a littl...",4
8,100144289,you've tagged `mysql` and `psql` which as far ...,4
9,99960745,without a `.prj` file you just have a collecti...,0


In [38]:
# User simply imports the remote_function decorator and uses it on their udf
from bigframes import remote_function

# Tell the user what needs to be done offline before using BigFrames remote functions
help(remote_function)

Help on function remote_function in module bigframes.remote_function:

remote_function(input_types: 'typing.Sequence[type]', output_type: 'type', session: 'typing.Optional[Session]' = None, bigquery_client: 'typing.Optional[bigquery.Client]' = None, dataset: 'typing.Optional[str]' = None, bigquery_connection: 'typing.Optional[str]' = None, reuse: 'bool' = True)
    Decorator to turn a user defined function into a BigQuery remote function.
    
    Args:
        input_types : list(type)
            List of input data types in the user defined function.
        output_type : type
            Data type of the output in the user defined function.
        session : bigframes.Session, Optional
            BigFrames session to use for getting default project, dataset and
            bigquery connection.
        bigquery_client : google.cloud.bigquery.Client, Optional
            Client to use for BigQuery operations. If this param is not provided
            then bigquery client from the sess

In [39]:
# BigFrames user is a data scientist and may not have privileges to create a BQ
# connector and set it up for invoking a cloud function. They should get such a
# connector created from their cloud admin and use it with BigFrames remote
# functions. If the provided connection name does not exist, BigFrames will try
# to create it on the fly assuming the user has sufficient prileges.
bq_connection_name = 'bigframes-rf-conn'

In [40]:
from time import perf_counter
start = perf_counter()
#-------------------------------------------------------------------------------------

# User defined function
# https://www.codespeedy.com/find-nth-prime-number-in-python/
@remote_function([int], int, session=pd, bigquery_connection=bq_connection_name)
def nth_prime(n):
    prime_numbers = [2,3]
    i=3
    if(0<n<=2):
        return prime_numbers[n-1]
    elif(n>2):
        while (True):
            i+=1
            status = True
            for j in range(2,int(i/2)+1):
                if(i%j==0):
                    status = False
                    break
            if(status==True):
                prime_numbers.append(i)
            if(len(prime_numbers)==n):
                break
        return prime_numbers[n-1]
    else:
        return -1

#-------------------------------------------------------------------------------------
print(f"\nWall time: {(perf_counter()-start):.4f} s")

[INFO][2023-06-23 21:08:20,360][bigframes.remote_function] Cloud function bigframes-ab6845d518afa4603967a525d9b7504d already exists.
[INFO][2023-06-23 21:08:21,807][bigframes.remote_function] Remote function bigframes_ab6845d518afa4603967a525d9b7504d already exists.



Wall time: 3.0511 s


In [41]:
%%time

df = df.assign(n_prime=df['score'].apply(nth_prime))
df.head(10)

CPU times: user 18.6 ms, sys: 0 ns, total: 18.6 ms
Wall time: 20.8 ms


Unnamed: 0,id,text,score,n_prime
0,94308208,"@ndtreviv: No, they’re not the same object. `{...",25,97
1,95766206,Fix your data model so you are not storing num...,6,13
2,85280004,2017: Even StackExchange stopping support: Dea...,6,13
3,85287880,"Yes, use the body. The second return parameter...",4,7
4,12543196,"@DavidHeffernan alright, I updated my answer w...",0,-1
5,113487636,Here is a link to where I finally solved this....,0,-1
6,113518947,"I also need an answer to this, one can hope fo...",0,-1
7,100053673,"Put the 10 dataframes in a list, write a littl...",4,7
8,100144289,you've tagged `mysql` and `psql` which as far ...,4,7
9,99960745,without a `.prj` file you just have a collecti...,0,-1
