In [1]:
!pip install pyarrow
!pip install google-cloud-bigquery

You should consider upgrading via the '/Users/fabian/anaconda3/bin/python -m pip install --upgrade pip' command.[0m
You should consider upgrading via the '/Users/fabian/anaconda3/bin/python -m pip install --upgrade pip' command.[0m


In [2]:
import psycopg2
import pandas as pd

from google.cloud import bigquery
from google.oauth2 import service_account

### Extracting data from PostgreSQL

In [3]:
conn = psycopg2.connect( host = 'localhost', port = 5432, database = 'dvdrental', user = 'postgres' )

In [5]:
cur = conn.cursor()
table = pd.read_sql_query( 'SELECT * FROM public.rental', conn )
cur.close()

In [6]:
conn.close()

In [9]:
table.shape

(16044, 7)

In [7]:
table.dtypes

rental_id                int64
rental_date     datetime64[ns]
inventory_id             int64
customer_id              int64
return_date     datetime64[ns]
staff_id                 int64
last_update     datetime64[ns]
dtype: object

In [8]:
table.head()

Unnamed: 0,rental_id,rental_date,inventory_id,customer_id,return_date,staff_id,last_update
0,2,2005-05-24 22:54:33,1525,459,2005-05-28 19:40:33,1,2006-02-16 02:30:53
1,3,2005-05-24 23:03:39,1711,408,2005-06-01 22:12:39,1,2006-02-16 02:30:53
2,4,2005-05-24 23:04:41,2452,333,2005-06-03 01:43:41,2,2006-02-16 02:30:53
3,5,2005-05-24 23:05:21,2079,222,2005-06-02 04:33:21,1,2006-02-16 02:30:53
4,6,2005-05-24 23:08:07,2792,549,2005-05-27 01:32:07,1,2006-02-16 02:30:53


### Transforming data

In [14]:
grouping = table.groupby( table[ 'rental_date' ].dt.date ).agg( { 'rental_id': 'count' } ).reset_index().rename( columns = { 'rental_id': 'count' } ).sort_values( by = 'rental_date' )

In [15]:
grouping

Unnamed: 0,rental_date,count
0,2005-05-24,8
1,2005-05-25,137
2,2005-05-26,174
3,2005-05-27,166
4,2005-05-28,196
5,2005-05-29,154
6,2005-05-30,158
7,2005-05-31,163
8,2005-06-14,16
9,2005-06-15,348


### Loading data to BigQuery

In [16]:
credentials = service_account.Credentials.from_service_account_file( 
    '/Users/fabian/Downloads/psychic-light-314420-54078d755af0.json', 
    scopes = [ 'https://www.googleapis.com/auth/cloud-platform' ]
)

In [18]:
client = bigquery.Client( credentials = credentials, project = credentials.project_id )

In [19]:
job_config = bigquery.LoadJobConfig(
    schema = [
        # Supported datatypes: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
        bigquery.SchemaField( 'rental_date', bigquery.enums.SqlTypeNames.DATE ),
        bigquery.SchemaField( 'count', bigquery.enums.SqlTypeNames.INT64 ),
    ],
    # Drod and re-create table, if exist
    write_disposition = 'WRITE_TRUNCATE',
)

In [20]:
job = client.load_table_from_dataframe(
    grouping, 'databases2.grouping', job_config = job_config
)
job.result()

<google.cloud.bigquery.job.load.LoadJob at 0x7fd94792ee10>

In [21]:
table = client.get_table( 'databases2.grouping' )
print(
    'Loaded {} rows and {} columns to {}'.format(
        table.num_rows, len( table.schema ), 'databases2.grouping'
    )
)

Loaded 41 rows and 2 columns to databases2.grouping
