# Demo to Show DataFrame Operations

In [1]:
# On the instance where you are running jupyter,
# authenticate with gcloud first:
#
#     gcloud auth application-default login

import bigframes as bf

conn = bf.connect()

In [2]:
df = conn.read_gbq("garrettwu-test-project-2.bigframes.scalars")

### Select a subset of the DF

In [3]:
df = df[["bool_col", "int64_col", "int64_too", "rowindex", "string_col"]]

In [4]:
df
# Here starts real execution, may take a while

   bool_col   int64_col  int64_too  rowindex     string_col
0      <NA>        <NA>          1         3           None
1     False  -987654321          1         1          こんにちは
2      True   123456789          0         0  Hello, World!
3      True      314159          0         2   ¡Hola Mundo!

[4 rows x 5 columns]

In [5]:
df.shape

(4, 5)

### Retrieve properties of the DF.

In [6]:
df.dtypes

bool_col      boolean
int64_col       Int64
int64_too       Int64
rowindex        Int64
string_col     string
dtype: object

In [7]:
df.columns

Index(['bool_col', 'int64_col', 'int64_too', 'rowindex', 'string_col'], dtype='object')

### Add a new column

In [8]:
df = df.assign(int64_col3=df['int64_col'] + df['int64_too'])
df

   bool_col   int64_col  int64_too  rowindex     string_col  int64_col3
0      <NA>        <NA>          1         3           None        <NA>
1     False  -987654321          1         1          こんにちは  -987654320
2      True   123456789          0         0  Hello, World!   123456789
3      True      314159          0         2   ¡Hola Mundo!      314159

[4 rows x 6 columns]

### Manipulate the column

In [9]:
df = df.rename(columns={"int64_col3": "int_new"})
df

   bool_col   int64_col  int64_too  rowindex     string_col     int_new
0      <NA>        <NA>          1         3           None        <NA>
1     False  -987654321          1         1          こんにちは  -987654320
2      True   123456789          0         0  Hello, World!   123456789
3      True      314159          0         2   ¡Hola Mundo!      314159

[4 rows x 6 columns]

In [10]:
df = df.drop(columns="int_new")

In [11]:
df

   bool_col   int64_col  int64_too  rowindex     string_col
0      <NA>        <NA>          1         3           None
1     False  -987654321          1         1          こんにちは
2      True   123456789          0         0  Hello, World!
3      True      314159          0         2   ¡Hola Mundo!

[4 rows x 5 columns]

### Drop Nan values

In [12]:
df = df.dropna()
df

   bool_col   int64_col  int64_too  rowindex     string_col
0     False  -987654321          1         1          こんにちは
1      True   123456789          0         0  Hello, World!
2      True      314159          0         2   ¡Hola Mundo!

[3 rows x 5 columns]

### Join two DFs

In [13]:
df1 = df["rowindex", "bool_col"]
df1

   rowindex  bool_col
0         1     False
1         0      True
2         2      True

[3 rows x 2 columns]

In [14]:
df2 = df["rowindex", "string_col"].head(2)
df2

   rowindex     string_col
0         1          こんにちは
1         0  Hello, World!

[2 rows x 2 columns]

In [15]:
df1.merge(df2, on="rowindex", how="inner")

   rowindex  bool_col     string_col
0         1     False          こんにちは
1         0      True  Hello, World!

[2 rows x 3 columns]

In [16]:
df1.merge(df2, on="rowindex", how="outer")

   rowindex  bool_col     string_col
0         0      True  Hello, World!
1         2      True           None
2         1     False          こんにちは

[3 rows x 3 columns]

In [17]:
df1.merge(df2, on="rowindex", how="left")

   rowindex  bool_col     string_col
0         1     False          こんにちは
1         0      True  Hello, World!
2         2      True           None

[3 rows x 3 columns]

In [18]:
df1.merge(df2, on="rowindex", how="right")

   rowindex  bool_col     string_col
0         1     False          こんにちは
1         0      True  Hello, World!

[2 rows x 3 columns]

### Concat two DFs

In [19]:
bf.concat([df, df])

   bool_col   int64_col  int64_too  rowindex     string_col
0     False  -987654321          1         1          こんにちは
1      True   123456789          0         0  Hello, World!
2      True      314159          0         2   ¡Hola Mundo!
3     False  -987654321          1         1          こんにちは
4      True   123456789          0         0  Hello, World!
5      True      314159          0         2   ¡Hola Mundo!

[6 rows x 5 columns]

### Access column through property

In [20]:
df.bool_col

0    False
1     True
2     True
Name: bool_col, dtype: boolean

### Retrieve SQL

In [21]:
print(df1.merge(df2, on="rowindex", how="inner").sql)

WITH t0 AS (
  SELECT t8.`rowindex` AS `rowindex`, t8.`string_col` AS `string_col`,
         ((((t8.`bool_col` IS NOT NULL) AND (t8.`int64_col` IS NOT NULL)) AND (t8.`int64_too` IS NOT NULL)) AND (t8.`rowindex` IS NOT NULL)) AND (t8.`string_col` IS NOT NULL) AS `bigframes_predicate`
  FROM `garrettwu-test-project-2.bigframes.scalars` t8
),
t1 AS (
  SELECT t0.*
  FROM t0
  WHERE t0.`bigframes_predicate`
),
t2 AS (
  SELECT t8.`rowindex` AS `rowindex`, t8.`bool_col` AS `bool_col`,
         ((((t8.`bool_col` IS NOT NULL) AND (t8.`int64_col` IS NOT NULL)) AND (t8.`int64_too` IS NOT NULL)) AND (t8.`rowindex` IS NOT NULL)) AND (t8.`string_col` IS NOT NULL) AS `bigframes_predicate`
  FROM `garrettwu-test-project-2.bigframes.scalars` t8
),
t3 AS (
  SELECT t2.*
  FROM t2
  WHERE t2.`bigframes_predicate`
),
t4 AS (
  SELECT t8.*
  FROM (
    SELECT t1.`rowindex`, t1.`string_col`
    FROM t1
  ) t8
  LIMIT 2
),
t5 AS (
  SELECT t3.`rowindex`, t3.`bool_col`
  FROM t3
),
t6 AS (
  SELECT t4.`rowi

### Special Column Names

In [22]:
df.rename(columns={"int64_too": "int64    col"})

   bool_col   int64_col  int64    col  rowindex     string_col
0     False  -987654321             1         1          こんにちは
1      True   123456789             0         0  Hello, World!
2      True      314159             0         2   ¡Hola Mundo!

[3 rows x 5 columns]

In [23]:
df.rename(columns={"int64_too": "int64!@#$%col"})

   bool_col   int64_col  int64!@#$%col  rowindex     string_col
0     False  -987654321              1         1          こんにちは
1      True   123456789              0         0  Hello, World!
2      True      314159              0         2   ¡Hola Mundo!

[3 rows x 5 columns]

In [24]:
df3 = df.rename(columns={"int64_too": "int64_col"})
df3

   bool_col   int64_col  int64_col  rowindex     string_col
0     False  -987654321          1         1          こんにちは
1      True   123456789          0         0  Hello, World!
2      True      314159          0         2   ¡Hola Mundo!

[3 rows x 5 columns]

In [25]:
df3["int64_col"]

    int64_col  int64_col
0  -987654321          1
1   123456789          0
2      314159          0

[3 rows x 2 columns]

### Binary Operation

In [26]:
df4 = df[["int64_col", "int64_too"]]
df4

    int64_col  int64_too
0  -987654321          1
1   123456789          0
2      314159          0

[3 rows x 2 columns]

In [27]:
df4 + 1

    int64_col  int64_too
0  -987654320          2
1   123456790          1
2      314160          1

[3 rows x 2 columns]