# Demo to Show DataFrame Operations

In [1]:
# On the instance where you are running jupyter,
# authenticate with gcloud first:
#
#     gcloud auth application-default login

import bigframes.pandas as bpd

bpd.options.bigquery.location = "US"

In [2]:
df = bpd.read_gbq("bigquery-public-data.baseball.schedules")

HTML(value='Query job ccb31707-38d2-4d93-8502-e39352f322a3 is RUNNING. <a target="_blank" href="https://consol…

HTML(value='Query job a71b5936-09b6-4e35-afeb-11fca4805832 is RUNNING. <a target="_blank" href="https://consol…

### Select a subset of the DF

In [3]:
df = df[["gameId", "year", "homeTeamName", "awayTeamName", "duration_minutes"]]

In [4]:
df
# Here starts real execution, may take a while

HTML(value='Query job fa4bad97-52d0-4117-85f4-9bb911520164 is DONE. 0 Bytes processed. <a target="_blank" href…

HTML(value='Query job fdc644e2-c008-485a-90b2-dc64e5c81f3b is DONE. 193.8 kB processed. <a target="_blank" hre…

Unnamed: 0,gameId,year,homeTeamName,awayTeamName,duration_minutes
0,e14b6493-9e7f-404f-840a-8a680cc364bf,2016,Marlins,Cubs,187
1,1f32b347-cbcb-4c31-a145-0e685306d168,2016,Marlins,Cubs,189
2,0c2292d1-7398-48be-bf8e-b41dad5e1a43,2016,Braves,Cubs,165
3,8fbec734-a15a-42ab-8d51-60790de7750b,2016,Braves,Cubs,222
4,89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd,2016,Phillies,Cubs,164
5,6a83e76c-dc0d-4924-9d3d-a2e7e0ab5b52,2016,Diamondbacks,Cubs,201
6,76ea8662-c7e6-4c38-8f2a-efe373e428ce,2016,Athletics,Cubs,173
7,66fad23d-6e89-4f99-be29-d49b6e94f95d,2016,Athletics,Cubs,176
8,d977367c-cf0c-4687-95a0-eb4542efcb01,2016,Rockies,Cubs,180
9,a87070ff-1084-43ca-a7ba-69278f93ecba,2016,Cardinals,Cubs,157


In [5]:
df.shape

(2431, 5)

### Retrieve properties of the DF.

In [6]:
df.dtypes

gameId              string[pyarrow]
year                          Int64
homeTeamName        string[pyarrow]
awayTeamName        string[pyarrow]
duration_minutes              Int64
dtype: object

In [7]:
df.columns

Index(['gameId', 'year', 'homeTeamName', 'awayTeamName', 'duration_minutes'], dtype='object')

### Add a new column

In [8]:
df = df.assign(title=df['homeTeamName'] + " vs " + df['awayTeamName'])
df

HTML(value='Query job c4b8deed-0c47-4ce7-b013-d8b24997851a is DONE. 0 Bytes processed. <a target="_blank" href…

HTML(value='Query job e6830838-99ae-4162-a47f-2185bb9c1f27 is DONE. 193.8 kB processed. <a target="_blank" hre…

Unnamed: 0,gameId,year,homeTeamName,awayTeamName,duration_minutes,title
0,e14b6493-9e7f-404f-840a-8a680cc364bf,2016,Marlins,Cubs,187,Marlins vs Cubs
1,1f32b347-cbcb-4c31-a145-0e685306d168,2016,Marlins,Cubs,189,Marlins vs Cubs
2,0c2292d1-7398-48be-bf8e-b41dad5e1a43,2016,Braves,Cubs,165,Braves vs Cubs
3,8fbec734-a15a-42ab-8d51-60790de7750b,2016,Braves,Cubs,222,Braves vs Cubs
4,89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd,2016,Phillies,Cubs,164,Phillies vs Cubs
5,6a83e76c-dc0d-4924-9d3d-a2e7e0ab5b52,2016,Diamondbacks,Cubs,201,Diamondbacks vs Cubs
6,76ea8662-c7e6-4c38-8f2a-efe373e428ce,2016,Athletics,Cubs,173,Athletics vs Cubs
7,66fad23d-6e89-4f99-be29-d49b6e94f95d,2016,Athletics,Cubs,176,Athletics vs Cubs
8,d977367c-cf0c-4687-95a0-eb4542efcb01,2016,Rockies,Cubs,180,Rockies vs Cubs
9,a87070ff-1084-43ca-a7ba-69278f93ecba,2016,Cardinals,Cubs,157,Cardinals vs Cubs


### Manipulate the column

In [9]:
df = df.rename(columns={"title": "headline"})
df

HTML(value='Query job 2062db30-30ae-42cf-8afa-9b8f3493fd98 is DONE. 0 Bytes processed. <a target="_blank" href…

HTML(value='Query job 9c36c84f-e672-46e1-a134-7ef2c2e60b4e is DONE. 0 Bytes processed. <a target="_blank" href…

Unnamed: 0,gameId,year,homeTeamName,awayTeamName,duration_minutes,headline
0,e14b6493-9e7f-404f-840a-8a680cc364bf,2016,Marlins,Cubs,187,Marlins vs Cubs
1,1f32b347-cbcb-4c31-a145-0e685306d168,2016,Marlins,Cubs,189,Marlins vs Cubs
2,0c2292d1-7398-48be-bf8e-b41dad5e1a43,2016,Braves,Cubs,165,Braves vs Cubs
3,8fbec734-a15a-42ab-8d51-60790de7750b,2016,Braves,Cubs,222,Braves vs Cubs
4,89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd,2016,Phillies,Cubs,164,Phillies vs Cubs
5,6a83e76c-dc0d-4924-9d3d-a2e7e0ab5b52,2016,Diamondbacks,Cubs,201,Diamondbacks vs Cubs
6,76ea8662-c7e6-4c38-8f2a-efe373e428ce,2016,Athletics,Cubs,173,Athletics vs Cubs
7,66fad23d-6e89-4f99-be29-d49b6e94f95d,2016,Athletics,Cubs,176,Athletics vs Cubs
8,d977367c-cf0c-4687-95a0-eb4542efcb01,2016,Rockies,Cubs,180,Rockies vs Cubs
9,a87070ff-1084-43ca-a7ba-69278f93ecba,2016,Cardinals,Cubs,157,Cardinals vs Cubs


In [10]:
df = df.drop(columns="headline")

In [11]:
df

HTML(value='Query job f0ed19be-b1f5-4333-a51f-3c7872a2bbc6 is DONE. 0 Bytes processed. <a target="_blank" href…

HTML(value='Query job 863e72d3-b421-4e53-98bb-9bf634fe9a71 is DONE. 193.8 kB processed. <a target="_blank" hre…

Unnamed: 0,gameId,year,homeTeamName,awayTeamName,duration_minutes
0,e14b6493-9e7f-404f-840a-8a680cc364bf,2016,Marlins,Cubs,187
1,1f32b347-cbcb-4c31-a145-0e685306d168,2016,Marlins,Cubs,189
2,0c2292d1-7398-48be-bf8e-b41dad5e1a43,2016,Braves,Cubs,165
3,8fbec734-a15a-42ab-8d51-60790de7750b,2016,Braves,Cubs,222
4,89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd,2016,Phillies,Cubs,164
5,6a83e76c-dc0d-4924-9d3d-a2e7e0ab5b52,2016,Diamondbacks,Cubs,201
6,76ea8662-c7e6-4c38-8f2a-efe373e428ce,2016,Athletics,Cubs,173
7,66fad23d-6e89-4f99-be29-d49b6e94f95d,2016,Athletics,Cubs,176
8,d977367c-cf0c-4687-95a0-eb4542efcb01,2016,Rockies,Cubs,180
9,a87070ff-1084-43ca-a7ba-69278f93ecba,2016,Cardinals,Cubs,157


### Drop Nan values

In [12]:
df = df.dropna()
df

HTML(value='Query job e387be31-99fc-46a9-9de7-3bb83ff1f4fe is DONE. 174.4 kB processed. <a target="_blank" hre…

HTML(value='Query job af23a9a2-151d-469a-a5c7-588ea60d1602 is DONE. 193.8 kB processed. <a target="_blank" hre…

Unnamed: 0,gameId,year,homeTeamName,awayTeamName,duration_minutes
0,e14b6493-9e7f-404f-840a-8a680cc364bf,2016,Marlins,Cubs,187
1,1f32b347-cbcb-4c31-a145-0e685306d168,2016,Marlins,Cubs,189
2,0c2292d1-7398-48be-bf8e-b41dad5e1a43,2016,Braves,Cubs,165
3,8fbec734-a15a-42ab-8d51-60790de7750b,2016,Braves,Cubs,222
4,89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd,2016,Phillies,Cubs,164
5,6a83e76c-dc0d-4924-9d3d-a2e7e0ab5b52,2016,Diamondbacks,Cubs,201
6,76ea8662-c7e6-4c38-8f2a-efe373e428ce,2016,Athletics,Cubs,173
7,66fad23d-6e89-4f99-be29-d49b6e94f95d,2016,Athletics,Cubs,176
8,d977367c-cf0c-4687-95a0-eb4542efcb01,2016,Rockies,Cubs,180
9,a87070ff-1084-43ca-a7ba-69278f93ecba,2016,Cardinals,Cubs,157


### Join two DFs

In [13]:
df1 = df[["gameId", "homeTeamName"]]
df1

HTML(value='Query job b14b9796-f94d-48c0-a477-13f6b865e11d is DONE. 174.4 kB processed. <a target="_blank" hre…

HTML(value='Query job 20f025c3-4d84-49e9-9fbd-d77eeb2c6e04 is RUNNING. <a target="_blank" href="https://consol…

Unnamed: 0,gameId,homeTeamName
0,e14b6493-9e7f-404f-840a-8a680cc364bf,Marlins
1,1f32b347-cbcb-4c31-a145-0e685306d168,Marlins
2,0c2292d1-7398-48be-bf8e-b41dad5e1a43,Braves
3,8fbec734-a15a-42ab-8d51-60790de7750b,Braves
4,89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd,Phillies
5,6a83e76c-dc0d-4924-9d3d-a2e7e0ab5b52,Diamondbacks
6,76ea8662-c7e6-4c38-8f2a-efe373e428ce,Athletics
7,66fad23d-6e89-4f99-be29-d49b6e94f95d,Athletics
8,d977367c-cf0c-4687-95a0-eb4542efcb01,Rockies
9,a87070ff-1084-43ca-a7ba-69278f93ecba,Cardinals


In [14]:
df2 = df[["gameId", "awayTeamName"]].head(2)
df2

HTML(value='Query job fbd5deef-4c7f-4345-ab6c-28c3e24bd918 is DONE. 193.8 kB processed. <a target="_blank" hre…

HTML(value='Query job ba580b64-ca65-4245-b17f-12fd382b2e2b is DONE. 193.8 kB processed. <a target="_blank" hre…

Unnamed: 0,gameId,awayTeamName
0,e14b6493-9e7f-404f-840a-8a680cc364bf,Cubs
1,1f32b347-cbcb-4c31-a145-0e685306d168,Cubs


In [15]:
df1.merge(df2, on="gameId", how="inner")

HTML(value='Query job 3f54256f-7189-400b-8d47-5ce1f6fe92c0 is DONE. 193.8 kB processed. <a target="_blank" hre…

HTML(value='Query job 57211b59-73c2-42d6-88bd-a614a8baf779 is DONE. 193.8 kB processed. <a target="_blank" hre…

Unnamed: 0,gameId,homeTeamName,awayTeamName
0,e14b6493-9e7f-404f-840a-8a680cc364bf,Marlins,Cubs
1,1f32b347-cbcb-4c31-a145-0e685306d168,Marlins,Cubs


In [16]:
df1.merge(df2, on="gameId", how="outer")

HTML(value='Query job 07ee6beb-b805-4ba9-8cb2-174d4e62ddfb is DONE. 193.8 kB processed. <a target="_blank" hre…

HTML(value='Query job ced45fc3-cfdb-4cd0-96ef-16a29d8d8f0b is DONE. 193.8 kB processed. <a target="_blank" hre…

Unnamed: 0,gameId,homeTeamName,awayTeamName
0,e14b6493-9e7f-404f-840a-8a680cc364bf,Marlins,Cubs
1,1f32b347-cbcb-4c31-a145-0e685306d168,Marlins,Cubs
2,0c2292d1-7398-48be-bf8e-b41dad5e1a43,Braves,
3,8fbec734-a15a-42ab-8d51-60790de7750b,Braves,
4,89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd,Phillies,
5,6a83e76c-dc0d-4924-9d3d-a2e7e0ab5b52,Diamondbacks,
6,76ea8662-c7e6-4c38-8f2a-efe373e428ce,Athletics,
7,66fad23d-6e89-4f99-be29-d49b6e94f95d,Athletics,
8,d977367c-cf0c-4687-95a0-eb4542efcb01,Rockies,
9,a87070ff-1084-43ca-a7ba-69278f93ecba,Cardinals,


In [17]:
df1.merge(df2, on="gameId", how="left")

HTML(value='Query job 60335c33-acc9-4a4a-9e08-190fe67ad60e is DONE. 193.8 kB processed. <a target="_blank" hre…

HTML(value='Query job 8c4e016f-429d-4591-8956-56fb18676334 is DONE. 193.8 kB processed. <a target="_blank" hre…

Unnamed: 0,gameId,homeTeamName,awayTeamName
0,e14b6493-9e7f-404f-840a-8a680cc364bf,Marlins,Cubs
1,1f32b347-cbcb-4c31-a145-0e685306d168,Marlins,Cubs
2,0c2292d1-7398-48be-bf8e-b41dad5e1a43,Braves,
3,8fbec734-a15a-42ab-8d51-60790de7750b,Braves,
4,89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd,Phillies,
5,6a83e76c-dc0d-4924-9d3d-a2e7e0ab5b52,Diamondbacks,
6,76ea8662-c7e6-4c38-8f2a-efe373e428ce,Athletics,
7,66fad23d-6e89-4f99-be29-d49b6e94f95d,Athletics,
8,d977367c-cf0c-4687-95a0-eb4542efcb01,Rockies,
9,a87070ff-1084-43ca-a7ba-69278f93ecba,Cardinals,


In [18]:
df1.merge(df2, on="gameId", how="right")

HTML(value='Query job 948a3d0b-1c3d-479b-b54f-9a2b2062380e is DONE. 193.8 kB processed. <a target="_blank" hre…

HTML(value='Query job 20d2a8bb-a876-4729-a439-8c8bbf591051 is DONE. 193.8 kB processed. <a target="_blank" hre…

Unnamed: 0,gameId,homeTeamName,awayTeamName
0,e14b6493-9e7f-404f-840a-8a680cc364bf,Marlins,Cubs
1,1f32b347-cbcb-4c31-a145-0e685306d168,Marlins,Cubs


### Concat two DFs

In [19]:
bpd.concat([df, df])

HTML(value='Query job ea340371-7874-4590-bb5e-f747f81397de is DONE. 174.4 kB processed. <a target="_blank" hre…

HTML(value='Query job 5c35116b-1c4a-4cc1-9ddd-172083d09490 is DONE. 193.8 kB processed. <a target="_blank" hre…

Unnamed: 0,gameId,year,homeTeamName,awayTeamName,duration_minutes
0,e14b6493-9e7f-404f-840a-8a680cc364bf,2016,Marlins,Cubs,187
1,1f32b347-cbcb-4c31-a145-0e685306d168,2016,Marlins,Cubs,189
2,0c2292d1-7398-48be-bf8e-b41dad5e1a43,2016,Braves,Cubs,165
3,8fbec734-a15a-42ab-8d51-60790de7750b,2016,Braves,Cubs,222
4,89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd,2016,Phillies,Cubs,164
5,6a83e76c-dc0d-4924-9d3d-a2e7e0ab5b52,2016,Diamondbacks,Cubs,201
6,76ea8662-c7e6-4c38-8f2a-efe373e428ce,2016,Athletics,Cubs,173
7,66fad23d-6e89-4f99-be29-d49b6e94f95d,2016,Athletics,Cubs,176
8,d977367c-cf0c-4687-95a0-eb4542efcb01,2016,Rockies,Cubs,180
9,a87070ff-1084-43ca-a7ba-69278f93ecba,2016,Cardinals,Cubs,157


### Access column through property

In [20]:
df.homeTeamName

HTML(value='Query job 79bc2f65-5c7e-470d-b30e-a959838f0ed9 is DONE. 174.4 kB processed. <a target="_blank" hre…

HTML(value='Query job 9de0da3d-b43c-4381-8ecb-5b8c6d9d2c8b is DONE. 193.8 kB processed. <a target="_blank" hre…

0          Marlins
1          Marlins
2           Braves
3           Braves
4         Phillies
5     Diamondbacks
6        Athletics
7        Athletics
8          Rockies
9        Cardinals
10       Cardinals
11       Cardinals
12         Pirates
13         Pirates
14          Giants
15            Reds
16            Reds
17            Reds
18            Reds
19          Padres
20       Nationals
21         Brewers
22         Brewers
23         Brewers
24          Astros
Name: homeTeamName, dtype: string

### Retrieve SQL

In [21]:
print(df1.merge(df2, on="gameId", how="inner").sql[0])

W


### Special Column Names

In [22]:
df.rename(columns={"homeTeamName": "HOME    TEAM"})

HTML(value='Query job 15cdbf31-e68a-41f0-9c5b-ea4ce49345f0 is DONE. 0 Bytes processed. <a target="_blank" href…

HTML(value='Query job 1e56a960-3fcd-421a-8500-bc1c099ae7a1 is DONE. 0 Bytes processed. <a target="_blank" href…

Unnamed: 0,gameId,year,HOME TEAM,awayTeamName,duration_minutes
0,e14b6493-9e7f-404f-840a-8a680cc364bf,2016,Marlins,Cubs,187
1,1f32b347-cbcb-4c31-a145-0e685306d168,2016,Marlins,Cubs,189
2,0c2292d1-7398-48be-bf8e-b41dad5e1a43,2016,Braves,Cubs,165
3,8fbec734-a15a-42ab-8d51-60790de7750b,2016,Braves,Cubs,222
4,89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd,2016,Phillies,Cubs,164
5,6a83e76c-dc0d-4924-9d3d-a2e7e0ab5b52,2016,Diamondbacks,Cubs,201
6,76ea8662-c7e6-4c38-8f2a-efe373e428ce,2016,Athletics,Cubs,173
7,66fad23d-6e89-4f99-be29-d49b6e94f95d,2016,Athletics,Cubs,176
8,d977367c-cf0c-4687-95a0-eb4542efcb01,2016,Rockies,Cubs,180
9,a87070ff-1084-43ca-a7ba-69278f93ecba,2016,Cardinals,Cubs,157


In [23]:
df.rename(columns={"homeTeamName": "homeTeam!@#$%col"})

HTML(value='Query job f21ef830-99b1-4fce-ab9a-378e12a04587 is DONE. 0 Bytes processed. <a target="_blank" href…

HTML(value='Query job 88f63cc1-9691-4e6c-8acf-650f31ab8560 is DONE. 0 Bytes processed. <a target="_blank" href…

Unnamed: 0,gameId,year,homeTeam!@#$%col,awayTeamName,duration_minutes
0,e14b6493-9e7f-404f-840a-8a680cc364bf,2016,Marlins,Cubs,187
1,1f32b347-cbcb-4c31-a145-0e685306d168,2016,Marlins,Cubs,189
2,0c2292d1-7398-48be-bf8e-b41dad5e1a43,2016,Braves,Cubs,165
3,8fbec734-a15a-42ab-8d51-60790de7750b,2016,Braves,Cubs,222
4,89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd,2016,Phillies,Cubs,164
5,6a83e76c-dc0d-4924-9d3d-a2e7e0ab5b52,2016,Diamondbacks,Cubs,201
6,76ea8662-c7e6-4c38-8f2a-efe373e428ce,2016,Athletics,Cubs,173
7,66fad23d-6e89-4f99-be29-d49b6e94f95d,2016,Athletics,Cubs,176
8,d977367c-cf0c-4687-95a0-eb4542efcb01,2016,Rockies,Cubs,180
9,a87070ff-1084-43ca-a7ba-69278f93ecba,2016,Cardinals,Cubs,157


In [24]:
df3 = df.rename(columns={"homeTeamName": "team", "awayTeamName": "team"})
df3

HTML(value='Query job 6f6e1d12-4202-434a-908a-3d0b34e70656 is DONE. 0 Bytes processed. <a target="_blank" href…

HTML(value='Query job 05ec231e-a7f0-4e41-ad60-5d8136a2e148 is DONE. 0 Bytes processed. <a target="_blank" href…

Unnamed: 0,gameId,year,team,team.1,duration_minutes
0,e14b6493-9e7f-404f-840a-8a680cc364bf,2016,Marlins,Cubs,187
1,1f32b347-cbcb-4c31-a145-0e685306d168,2016,Marlins,Cubs,189
2,0c2292d1-7398-48be-bf8e-b41dad5e1a43,2016,Braves,Cubs,165
3,8fbec734-a15a-42ab-8d51-60790de7750b,2016,Braves,Cubs,222
4,89e514d5-fbf5-4b9d-bdac-6ca45bfd18dd,2016,Phillies,Cubs,164
5,6a83e76c-dc0d-4924-9d3d-a2e7e0ab5b52,2016,Diamondbacks,Cubs,201
6,76ea8662-c7e6-4c38-8f2a-efe373e428ce,2016,Athletics,Cubs,173
7,66fad23d-6e89-4f99-be29-d49b6e94f95d,2016,Athletics,Cubs,176
8,d977367c-cf0c-4687-95a0-eb4542efcb01,2016,Rockies,Cubs,180
9,a87070ff-1084-43ca-a7ba-69278f93ecba,2016,Cardinals,Cubs,157


In [25]:
df3["team"]

HTML(value='Query job f4b90f3c-381e-470d-8416-54f31f1fbb3a is DONE. 174.4 kB processed. <a target="_blank" hre…

HTML(value='Query job 5cd5e48d-8b02-4500-94cd-cbd2ee91957e is DONE. 193.8 kB processed. <a target="_blank" hre…

Unnamed: 0,team,team.1
0,Marlins,Cubs
1,Marlins,Cubs
2,Braves,Cubs
3,Braves,Cubs
4,Phillies,Cubs
5,Diamondbacks,Cubs
6,Athletics,Cubs
7,Athletics,Cubs
8,Rockies,Cubs
9,Cardinals,Cubs


### Binary Operation

In [26]:
df4 = df[["year", "duration_minutes"]]
df4

HTML(value='Query job 6b1cf632-7764-49ba-bd5d-cdf31c47e430 is DONE. 174.4 kB processed. <a target="_blank" hre…

HTML(value='Query job baa31010-2426-4ea5-9527-8f55473a3f41 is DONE. 193.8 kB processed. <a target="_blank" hre…

Unnamed: 0,year,duration_minutes
0,2016,187
1,2016,189
2,2016,165
3,2016,222
4,2016,164
5,2016,201
6,2016,173
7,2016,176
8,2016,180
9,2016,157


In [27]:
df4 + 1

HTML(value='Query job 62ffb914-e1fb-4b12-adb5-09b431e06acf is DONE. 174.4 kB processed. <a target="_blank" hre…

HTML(value='Query job 2eaceff3-b00c-42d0-883f-fbe85a70f49b is DONE. 193.8 kB processed. <a target="_blank" hre…

Unnamed: 0,year,duration_minutes
0,2017,188
1,2017,190
2,2017,166
3,2017,223
4,2017,165
5,2017,202
6,2017,174
7,2017,177
8,2017,181
9,2017,158


### Download the result as (in-memory) pandas DataFrame

In [28]:
dfp = df4.to_pandas()
dfp

HTML(value='Query job f3b7ff8a-ffdc-4f4c-91d3-6c2c702c373d is DONE. 193.8 kB processed. <a target="_blank" hre…

Unnamed: 0,year,duration_minutes
0,2016,187
1,2016,189
2,2016,165
3,2016,222
4,2016,164
...,...,...
2426,2016,156
2427,2016,185
2428,2016,243
2429,2016,184
