## Packages

From termainal I did:

```
pipenv install
pipenv install jupyter pymysql sqlalchemy requests
```


In [1]:
import pymysql.cursors
import requests
from datetime import datetime
from sqlalchemy import create_engine
import hashlib 
import os
import json
import time
from collections import defaultdict
from flask import jsonify
import re
import pandas as pd
import s3fs
import boto3

# variables we'll need
host = os.environ['DBHOST']
port = 3306
dbname = "collab"
user = os.environ['DBUSER']
password = os.environ['DBPASSWORD']

In [2]:
#### Main function ####

def handler(incoming):
    
    ## Put code here
    
    
    return True

In [3]:
########

In [4]:
# create sqlalchemy engine
engine = create_engine("mysql+pymysql://{user}:{pw}@{host}/{db}"
                   .format(user=user,
                           pw=password,
                           host=host,
                           db=dbname))

In [5]:
# Get the data we need
dbConnection    = engine.connect()

df = pd.read_sql("SELECT `identifier`, `item_key`, `item_value` FROM `data_pieces`", dbConnection)
columns_df = pd.read_sql("SELECT * FROM `column_tracker`", dbConnection)
text_df = pd.read_sql("SELECT * FROM `text_log`", dbConnection)
first_contact_df = pd.read_sql("SELECT * FROM `first_contact`", dbConnection)

dbConnection.close()

In [6]:
pivoted = df.pivot(index='identifier', columns='item_key', values=['item_value'])

In [7]:
# pivoted

In [8]:
pivoted.columns = pivoted.columns.get_level_values(1)
pivoted.reset_index(inplace=True) 

In [9]:
# pivoted

In [10]:
# pivoted.columns

In [11]:
columns_df.sort_values(by=['created_at'], inplace=True)

In [12]:
ordered_columns = columns_df['col'].tolist()

In [13]:
ordered_columns.insert(0, "identifier")

In [14]:
# ordered_columns

In [15]:
pivoted_ordered = pivoted[ordered_columns]

In [16]:
# pivoted_ordered

In [17]:
# text_df

In [18]:
text_concat = text_df.groupby(['identifier'])['raw_text'].apply(' | '.join).reset_index()



In [19]:
merge1 = pd.merge(pivoted_ordered, text_concat, on="identifier")

In [20]:
# merge1

In [21]:
first_contact_df.sort_values(by=['created_at'], inplace=True)

In [22]:
final_table = pd.merge(first_contact_df, merge1, on="identifier")

In [23]:
# final_table

In [24]:
csv_file = f"{dbname}.csv"
final_table.to_csv(csv_file, index=False)

## This Google Sheets version requires user authentication

In [1]:
import pickle
from googleapiclient.discovery import build

SPREADSHEET_ID = '1M3kmYJu3ZTQ7eyCFJ9iSY0uqRfZqQh_5H93TL_fyqtM' # Get this one from the link in browser
worksheet_name = 'Sheet1'
path_to_csv = './collab.csv'
path_to_credentials = './secrets/token.pickle'


# convenience routines
def find_sheet_id_by_name(sheet_name):
    # ugly, but works
    sheets_with_properties = API \
        .spreadsheets() \
        .get(spreadsheetId=SPREADSHEET_ID, fields='sheets.properties') \
        .execute() \
        .get('sheets')

    for sheet in sheets_with_properties:
        if 'title' in sheet['properties'].keys():
            if sheet['properties']['title'] == sheet_name:
                return sheet['properties']['sheetId']


def push_csv_to_gsheet(csv_path, sheet_id):
    with open(csv_path, 'r') as csv_file:
        csvContents = csv_file.read()
    body = {
        'requests': [{
            'pasteData': {
                "coordinate": {
                    "sheetId": sheet_id,
                    "rowIndex": "0",  # adapt this if you need different positioning
                    "columnIndex": "0", # adapt this if you need different positioning
                },
                "data": csvContents,
                "type": 'PASTE_NORMAL',
                "delimiter": ',',
            }
        }]
    }
    request = API.spreadsheets().batchUpdate(spreadsheetId=SPREADSHEET_ID, body=body)
    response = request.execute()
    return response


# upload
with open(path_to_credentials, 'rb') as token:
    credentials = pickle.load(token)

API = build('sheets', 'v4', credentials=credentials)

push_csv_to_gsheet(
    csv_path=path_to_csv,
    sheet_id=find_sheet_id_by_name(worksheet_name)
)

{'spreadsheetId': '1M3kmYJu3ZTQ7eyCFJ9iSY0uqRfZqQh_5H93TL_fyqtM',
 'replies': [{}]}

## Trying a service account

In [25]:
from gspread_pandas import Spread, Client

In [35]:
spread = Spread('1uHvY_Z0lpGdvAgkfTt-sNXMe5yGOiq0NZJju0aLej1E')

In [38]:
spread.df_to_sheet(final_table, index=False, sheet='Sheet1', start='A1', replace=True)