In [None]:
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

In [None]:
def get_credentials():
    SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']
    creds = None
    # The file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)

    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)
            
    return creds

In [None]:
def get_sheet_name_and_id(service, spreadsheetId):
    sheet = service.spreadsheets()
    sheet_metadata = sheet.get(spreadsheetId=spreadsheetId).execute()
    return {
        'id': spreadsheetId,
        'title': sheet_metadata['properties']['title']
    }

In [None]:
def get_sheet_data(service, spreadsheet_id, spreadsheet_range):
    sheet = service.spreadsheets()
    sheet_values = sheet.values()
    sheet_details = sheet_values.get(spreadsheetId=spreadsheet_id,
                            range=spreadsheet_range).execute()
    return sheet_details.get('values')[0], sheet_details.get('values')[1:]

In [None]:
SPREADSHEET_ID = '1lgyVuw6nVyRnmKtCPbXF4kYcop5HMJ8H3eeNsArAlVk'

In [None]:
RANGE_NAME = 'Form Masked!A1:G'

In [None]:
creds = get_credentials()

In [None]:
service = build('sheets', 'v4', credentials=creds)

In [None]:
sheet_metadata = get_sheet_name_and_id(service, SPREADSHEET_ID)

In [None]:
sheet_metadata

In [None]:
sheet_columns, sheet_rows = get_sheet_data(service, SPREADSHEET_ID, RANGE_NAME)

In [None]:
for column in sheet_columns: print(column)

In [None]:
for row in sheet_rows[:3]: print(row)

In [None]:
import pandas as pd

sheet_df = pd.DataFrame(sheet_rows, columns=sheet_columns)

In [None]:
sheet_df = sheet_df.drop(sheet_df.columns[5:], axis=1)

In [None]:
sheet_df.columns

In [None]:
sheet_df = sheet_df.drop('ITVersity Id', axis=1)

In [None]:
sheet_df.columns = ['submitted_ts', 'email_id', 'first_name', 'last_name']

In [None]:
sheet_df[['form_id', 'form_title']] = [sheet_metadata['id'], sheet_metadata['title']]

In [None]:
sheet_df[:3]

In [None]:
form_submissions = sheet_df[['email_id', 'form_id', 'form_title', 'submitted_ts']].values.tolist()

In [None]:
form_submissions[:3]

In [None]:
query = ("""INSERT INTO form_submissions_stg
         (email_id, form_id, form_title, submitted_ts)
         VALUES
         (%s, %s, %s, %s)""")

In [None]:
import psycopg2

def get_connection(host, port, database, user, password):
    connection = None
    try:
        connection = psycopg2.connect(
            host=host,
            port=port,
            database=database,
            user=user,
            password=password
        )
    except Exception as e:
        raise(e)
    
    return connection

In [None]:
def get_cursor(connection):
    return connection.cursor()

In [None]:
def load_data(connection, cursor, query, data):
    data_batch = []
    count = 1
    for rec in data:
        data_batch.append(tuple(rec))
        if(count%100 == 0):
            cursor.executemany(query, data_batch)
            connection.commit()
            data_batch = []
        count = count + 1
    cursor.executemany(query, data_batch)
    connection.commit()

In [None]:
connection = get_connection(
    host='localhost', 
    port='5432', 
    database='itversity_sms_db', 
    user='itversity_sms_user', 
    password='sms_password'
)

In [None]:
cursor = get_cursor(connection)

In [None]:
%load_ext sql

In [None]:
%env DATABASE_URL=postgresql://itversity_sms_user:sms_password@localhost:5432/itversity_sms_db

In [None]:
%%sql

CREATE TABLE form_submissions_stg
AS
SELECT u.email_id, fs.form_id, fs.form_title, fs.submitted_ts
FROM form_submissions AS fs
    JOIN users AS u
        ON fs.user_id = u.user_id
WHERE 1 = 2

In [None]:
%%sql

CREATE INDEX form_submissions_stg_email_id_idx
ON form_submissions_stg(email_id)

In [None]:
%%sql

SELECT * FROM users LIMIT 10

In [None]:
load_data(connection, cursor, query, form_submissions)

In [None]:
%%sql

SELECT * FROM form_submissions_stg LIMIT 10

In [None]:
%%sql

INSERT INTO form_submissions (user_id, form_id, form_title, submitted_ts)
SELECT u.user_id, fss.form_id, fss.form_title, fss.submitted_ts
FROM users AS u
    JOIN form_submissions_stg fss
        ON u.email_id = fss.email_id

In [None]:
%%sql

TRUNCATE TABLE form_submissions_stg

In [None]:
%%sql

SELECT * FROM form_submissions LIMIT 10