# Setup

In [1]:
import bigframes.pandas as bpd

In [2]:
df = bpd.read_gbq("bigquery-public-data.baseball.schedules")[["homeTeamName", "awayTeamName", "duration_minutes"]]
df.peek()

Unnamed: 0,homeTeamName,awayTeamName,duration_minutes
199,Cubs,Padres,193
264,Braves,Phillies,220
379,Orioles,Rockies,194
461,White Sox,Tigers,128
564,Rangers,Twins,194


# Self-contained function

In [3]:
@bpd.remote_function()
def duration_category(duration: int) -> str:
    if duration < 90:
        return "short"
    elif duration < 180:
        return "medium"
    else:
        return "long"

print(f"Created cloud function '{duration_category.bigframes_cloud_function}' and BQ remote function '{duration_category.bigframes_cloud_function}'.")

Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-045b06b95e48dd89b1cb65e120b8c23a' and BQ remote function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-045b06b95e48dd89b1cb65e120b8c23a'.


In [4]:
df1 = df.assign(duration_cat=df["duration_minutes"].apply(duration_category))
df1.peek()

Unnamed: 0,homeTeamName,awayTeamName,duration_minutes,duration_cat
1911,Dodgers,Angels,132,medium
2365,Athletics,Angels,134,medium
1977,Athletics,Angels,139,medium
554,Cubs,Angels,142,medium
654,Astros,Angels,143,medium


# Function referring to variables outside the function body

In [5]:
DURATION_CATEGORY_SHORT = "S"
DURATION_CATEGORY_MEDIUM = "M"
DURATION_CATEGORY_LONG = "L"

In [6]:
@bpd.remote_function()
def duration_category(duration: int) -> str:
    if duration < 90:
        return DURATION_CATEGORY_SHORT
    elif duration < 180:
        return DURATION_CATEGORY_MEDIUM
    else:
        return DURATION_CATEGORY_LONG

print(f"Created cloud function '{duration_category.bigframes_cloud_function}' and BQ remote function '{duration_category.bigframes_cloud_function}'.")

Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-181cf9060d0c48416f6fb523bbf50ff0' and BQ remote function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-181cf9060d0c48416f6fb523bbf50ff0'.


In [7]:
df1 = df.assign(duration_cat=df["duration_minutes"].apply(duration_category))
df1.peek()

Unnamed: 0,homeTeamName,awayTeamName,duration_minutes,duration_cat
1911,Dodgers,Angels,132,M
2365,Athletics,Angels,134,M
1977,Athletics,Angels,139,M
554,Cubs,Angels,142,M
654,Astros,Angels,143,M


# Function referring to imports outside the function body

In [8]:
import math

In [9]:
@bpd.remote_function()
def duration_category(duration: int) -> str:
    duration_hours = math.ceil(duration / 60)
    return f"{duration_hours}h"

print(f"Created cloud function '{duration_category.bigframes_cloud_function}' and BQ remote function '{duration_category.bigframes_cloud_function}'.")

Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-eceecd96cd0ab34e4405c6ea038cedf3' and BQ remote function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-eceecd96cd0ab34e4405c6ea038cedf3'.


In [10]:
df1 = df.assign(duration_cat=df["duration_minutes"].apply(duration_category))
df1.peek()

Unnamed: 0,homeTeamName,awayTeamName,duration_minutes,duration_cat
1911,Dodgers,Angels,132,3h
2365,Athletics,Angels,134,3h
1977,Athletics,Angels,139,3h
554,Cubs,Angels,142,3h
654,Astros,Angels,143,3h


# Function referring to another function outside the function body

In [11]:
import math
def get_hour_ceiling(minutes):
  return math.ceil(minutes / 60)

In [12]:
@bpd.remote_function()
def duration_category(duration: int) -> str:
    duration_hours = get_hour_ceiling(duration)
    return f"{duration_hours} hrs"

print(f"Created cloud function '{duration_category.bigframes_cloud_function}' and BQ remote function '{duration_category.bigframes_cloud_function}'.")

Created cloud function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-a0376c3af8d8c33eaf72ea40e2400cee' and BQ remote function 'projects/bigframes-dev/locations/us-central1/functions/bigframes-a0376c3af8d8c33eaf72ea40e2400cee'.


In [13]:
df1 = df.assign(duration_cat=df["duration_minutes"].apply(duration_category))
df1.peek()

Unnamed: 0,homeTeamName,awayTeamName,duration_minutes,duration_cat
1911,Dodgers,Angels,132,3 hrs
2365,Athletics,Angels,134,3 hrs
1977,Athletics,Angels,139,3 hrs
554,Cubs,Angels,142,3 hrs
654,Astros,Angels,143,3 hrs


# Function requiring external packages

In [14]:
@bpd.remote_function(packages=["cryptography"])
def get_hash(input: str) -> str:
    from cryptography.fernet import Fernet

    # handle missing value
    if input is None:
        input = ""

    key = Fernet.generate_key()
    f = Fernet(key)
    return f.encrypt(input.encode()).decode()

In [15]:
df1 = df.assign(homeTeamNameRedacted=df["homeTeamName"].apply(get_hash))
df1.peek()

Unnamed: 0,homeTeamName,awayTeamName,duration_minutes,homeTeamNameRedacted
641,American League,National League,185,gAAAAABmfKmeq6tGGNQsVBC34KJZrg9OU_ISAzqE78btoZ...
1600,Angels,Astros,198,gAAAAABmfKme0eFTc6EBXs9zOXUfapz30rl7lF7YuBk8iG...
1173,Angels,Astros,158,gAAAAABmfKmeFXUBH798eXRxxhHK8WBShcIfmw1sirwbvv...
1855,Angels,Astros,297,gAAAAABmfKme3Q8godh2tXXpzkJ6e5x-V1yRy3O23gPwn2...
557,Angels,Astros,166,gAAAAABmfKme5rxvpWEVwfeetzRVgHHCLVGf4FjtK1ypP2...
