In [None]:
!pip install pyarrow
!pip install google-cloud-bigquery

In [None]:
import psycopg2
import pandas as pd

from google.cloud import bigquery
from google.oauth2 import service_account

### Extracting data from PostgreSQL

In [None]:
conn = psycopg2.connect( host = 'localhost', port = 5432, database = 'dvdrental', user = 'postgres' )

In [None]:
cur = conn.cursor()
table = pd.read_sql_query( 'SELECT * FROM public.rental', conn )
cur.close()

In [None]:
conn.close()

In [None]:
table.dtypes

### Transforming data

In [None]:
grouping = table.groupby( table[ 'rental_date' ].dt.date ).agg( { 'rental_id': 'count' } ).reset_index().rename( columns = { 'rental_id': 'count' } ).sort_values( by = 'rental_date' )

In [None]:
grouping

### Loading data to BigQuery

In [None]:
credentials = service_account.Credentials.from_service_account_file( 
    '/Users/fabian/Downloads/bigquery-databases2.json', 
    scopes = [ 'https://www.googleapis.com/auth/cloud-platform' ]
)

In [None]:
client = bigquery.Client( credentials = credentials, project = credentials.project_id )

In [None]:
job_config = bigquery.LoadJobConfig(
    schema = [
        # Supported datatypes: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
        bigquery.SchemaField( 'rental_date', bigquery.enums.SqlTypeNames.DATE ),
        bigquery.SchemaField( 'count', bigquery.enums.SqlTypeNames.INT64 ),
    ],
    # Drod and re-create table, if exist
    write_disposition = 'WRITE_TRUNCATE',
)

In [None]:
job = client.load_table_from_dataframe(
    grouping, 'databases2.grouping', job_config = job_config
)
job.result()

In [None]:
table = client.get_table( 'databases2.grouping' )
print(
    'Loaded {} rows and {} columns to {}'.format(
        table.num_rows, len( table.schema ), 'databases2.grouping'
    )
)