# BigFrames Remote Function
@johanesalxd

## Setup

In [1]:
import bigframes.pandas as bpd

In [2]:
df = bpd.read_gbq("bigquery-public-data.baseball.schedules")[
    ["homeTeamName", "awayTeamName", "duration_minutes"]
]
df.peek()



Unnamed: 0,homeTeamName,awayTeamName,duration_minutes
54,Astros,Orioles,183
256,Nationals,Mets,168
266,Rays,Astros,199
485,White Sox,Rays,177
765,Blue Jays,Rays,190


## Self-contained function

In [3]:
@bpd.remote_function(reuse=False)
def duration_category(duration_minutes: int) -> str:
    if duration_minutes < 90:
        return "short"
    elif duration_minutes < 180:
        return "medium"
    else:
        return "long"


print(
    f"Created cloud function '{duration_category.bigframes_cloud_function}' and BQ remote function '{duration_category.bigframes_remote_function}'."
)

Created cloud function 'projects/johanesa-playground-326616/locations/us-central1/functions/bigframes-session04c756-0cb2be923b0cfecc667c89caca408421-zjjb' and BQ remote function 'johanesa-playground-326616._c245951eb0f07274ff55b0c945510c5e04b96b5a.bigframes_session04c756_0cb2be923b0cfecc667c89caca408421_zjjb'.


In [4]:
df1 = df.assign(duration_cat=df["duration_minutes"].apply(duration_category))
df1.peek()

Unnamed: 0,homeTeamName,awayTeamName,duration_minutes,duration_cat
2410,Nationals,Cubs,257,long
2101,Reds,Cubs,283,long
930,Brewers,Cubs,300,long
682,Pirates,Cubs,75,short
2339,Brewers,Cubs,130,medium


## Function referring to variables outside the function body

In [None]:
DURATION_CATEGORY_SHORT = "S"
DURATION_CATEGORY_MEDIUM = "M"
DURATION_CATEGORY_LONG = "L"

In [None]:
@bpd.remote_function(reuse=False)
def duration_category(duration_minutes: int) -> str:
    if duration_minutes < 90:
        return DURATION_CATEGORY_SHORT
    elif duration_minutes < 180:
        return DURATION_CATEGORY_MEDIUM
    else:
        return DURATION_CATEGORY_LONG


print(
    f"Created cloud function '{duration_category.bigframes_cloud_function}' and BQ remote function '{duration_category.bigframes_remote_function}'."
)

Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session54c8b0-4191f0fce98d46cc09359de47e203236-e009' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session54c8b0_4191f0fce98d46cc09359de47e203236_e009'.


In [None]:
df1 = df.assign(duration_cat=df["duration_minutes"].apply(duration_category))
df1.peek()

Unnamed: 0,homeTeamName,awayTeamName,duration_minutes,duration_cat
1911,Dodgers,Angels,132,M
2365,Athletics,Angels,134,M
1977,Athletics,Angels,139,M
554,Cubs,Angels,142,M
654,Astros,Angels,143,M


## Function referring to imports (built-in) outside the function body

In [None]:
import math as mymath

In [None]:
@bpd.remote_function(reuse=False)
def duration_category(duration_minutes: int) -> str:
    duration_hours = mymath.ceil(duration_minutes / 60)
    return f"{duration_hours}h"


print(
    f"Created cloud function '{duration_category.bigframes_cloud_function}' and BQ remote function '{duration_category.bigframes_remote_function}'."
)

Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session54c8b0-cf31fc2d2c7fe111afa5526f5a9cdf06-gmmo' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session54c8b0_cf31fc2d2c7fe111afa5526f5a9cdf06_gmmo'.


In [None]:
df1 = df.assign(duration_cat=df["duration_minutes"].apply(duration_category))
df1.peek()

Unnamed: 0,homeTeamName,awayTeamName,duration_minutes,duration_cat
1911,Dodgers,Angels,132,3h
2365,Athletics,Angels,134,3h
1977,Athletics,Angels,139,3h
554,Cubs,Angels,142,3h
654,Astros,Angels,143,3h


## Function referring to another function outside the function body

In [None]:
import math


def get_minutes_in_hour():
    return 60


def get_hour_ceiling(minutes):
    return math.ceil(minutes / get_minutes_in_hour())

In [None]:
@bpd.remote_function(reuse=False)
def duration_category(duration_minutes: int) -> str:
    duration_hours = get_hour_ceiling(duration_minutes)
    return f"{duration_hours} hrs"


print(
    f"Created cloud function '{duration_category.bigframes_cloud_function}' and BQ remote function '{duration_category.bigframes_remote_function}'."
)

Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-session54c8b0-3c03836c2044bf625d02e25ccdbfe101-k1m4' and BQ remote function 'bigframes-dev._1b6c31ff1bcd5d2f6d86833cf8268317f1b12d57.bigframes_session54c8b0_3c03836c2044bf625d02e25ccdbfe101_k1m4'.


In [None]:
df1 = df.assign(duration_cat=df["duration_minutes"].apply(duration_category))
df1.peek()

Unnamed: 0,homeTeamName,awayTeamName,duration_minutes,duration_cat
1911,Dodgers,Angels,132,3 hrs
2365,Athletics,Angels,134,3 hrs
1977,Athletics,Angels,139,3 hrs
554,Cubs,Angels,142,3 hrs
654,Astros,Angels,143,3 hrs


## Function requiring external packages

In [None]:
@bpd.remote_function(reuse=False, packages=["cryptography"])
def get_hash(input: str) -> str:
    from cryptography.fernet import Fernet

    # handle missing value
    if input is None:
        input = ""

    key = Fernet.generate_key()
    f = Fernet(key)
    return f.encrypt(input.encode()).decode()

In [None]:
df1 = df.assign(homeTeamNameRedacted=df["homeTeamName"].apply(get_hash))
df1.peek()

Unnamed: 0,homeTeamName,awayTeamName,duration_minutes,homeTeamNameRedacted
641,American League,National League,185,gAAAAABmo0n2I391cbYwIYeg8lyJq1MSFZatrtpvuUD5v-...
349,Angels,Astros,187,gAAAAABmo0n2pX-siRwl2tIZA4m--swndC_b7vgGXrqSNM...
2349,Angels,Astros,160,gAAAAABmo0n28Q9RwH62HvYRhTDpQ9lo8c6G8F5bnn7wgF...
557,Angels,Astros,166,gAAAAABmo0n2YlwHlSGQ0_XvXd-QVBtB_Lq2zUifu7vKhg...
220,Angels,Astros,162,gAAAAABmo0n2l8HMSGKYizxfEmRvGQy96mrjwx734-Rl_Z...


## Function referring to imports (third-party) outside the function body

In [5]:
import datetime as dt
import humanize

In [6]:
@bpd.remote_function(reuse=False, packages=["humanize"])
def duration_category(duration_minutes: int) -> str:
    timedelta = dt.timedelta(minutes=duration_minutes)
    return humanize.naturaldelta(timedelta)


print(
    f"Created cloud function '{duration_category.bigframes_cloud_function}' and BQ remote function '{duration_category.bigframes_remote_function}'."
)

Created cloud function 'projects/johanesa-playground-326616/locations/us-central1/functions/bigframes-session04c756-02c432a4d4c2870a2d3da183d306aec1-wjgk' and BQ remote function 'johanesa-playground-326616._c245951eb0f07274ff55b0c945510c5e04b96b5a.bigframes_session04c756_02c432a4d4c2870a2d3da183d306aec1_wjgk'.


In [7]:
df1 = df.assign(duration_cat=df["duration_minutes"].apply(duration_category))
df1.peek()

Unnamed: 0,homeTeamName,awayTeamName,duration_minutes,duration_cat
2410,Nationals,Cubs,257,4 hours
2101,Reds,Cubs,283,4 hours
930,Brewers,Cubs,300,5 hours
682,Pirates,Cubs,75,an hour
2339,Brewers,Cubs,130,2 hours
