In [None]:
from __future__ import print_function
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import pandas as pd

In [None]:
# First, parse code for 'ToDO'
# These indicate where a string may need to be changed

# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']


# The ID and range of the spreadsheet. # ToDO: All info
SPREADSHEET_ID = '1IdPyKhkjEE_txt8iZ8WwHDhvTUF3JO3cHRF8McZkLYw'
RANGE_NAME_READ = 'Summary!A:J'
RANGE_NAME_WRITE = 'Committees!A:B'


# Set input type,
# RAW will input as string  /  USER_ENTERED will input dynamically
VALUE_INPUT_OPTION = 'USER_ENTERED'

# Columns labels from the spreadsheet called by RANGE_NAME ToDO: All Columns
columns = ['Bill Number', 'Date Introduced', 'Bill Title', 'Summary of Bill', 'Assigned Committee(s)',
           'Hearing(s)', 'Passed(S/H)', 'Bill Sponsor', 'Co-sponsors', 'Keywords']
# Columns to be excluded from the dataframe ToDO: All columns
exclude = ['Date Introduced', 'Bill Title', 'Summary of Bill',
           'Hearing(s)', 'Passed(S/H)', 'Bill Sponsor', 'Co-sponsors','Keywords']

In [None]:
# Connect to google sheet

creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
    with open('token.pickle', 'rb') as token:
        creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
    if creds and creds.expired and creds.refresh_token:
        creds.refresh(Request())
    else:
        flow = InstalledAppFlow.from_client_secrets_file(
            'credentials.json', SCOPES)
        creds = flow.run_local_server(port=0)
    # Save the credentials for the next run
    with open('token.pickle', 'wb') as token:
        pickle.dump(creds, token)

service = build('sheets', 'v4', credentials=creds)

In [None]:
# Call the Sheets API
sheet = service.spreadsheets()
result = sheet.values().get(spreadsheetId=SPREADSHEET_ID,
                            range=RANGE_NAME_READ).execute()
records = pd.DataFrame.from_records(result.get('values',[]),
                                 columns=columns,
                                 exclude=exclude,
                                 index = 'Bill Number') # ToDO: Bill Number


# Drop 'ID' row, it is superfluous, and reset the Index
records.drop(['Bill Number'],inplace=True) # ToDO: Bill Number
#   records.reset_index(inplace=True)

In [None]:
# print data to verify
if records.empty:
    print('No data found.')
else:
    print(records)


In [None]:
# Create a dataframe to load expanded keywords into
data = pd.DataFrame(columns=['Bill Number', 'Committee']) # ToDO: Bill Number, Committee
# Expand the dataframe by splitting the string on ',' and assigning the
# associated ID value
for i, row in records.iterrows():
    for word in row['Assigned Committee(s)'].split(';'): # ToDO: Assigned Committee(s)
        data = data.append({'Bill Number': i, 'Committee': word}, ignore_index=True) # ToDO: Bill Number, Committee

In [None]:
# clean data
stripped = pd.Series(data['Committee'].values)  # Convert Keyword column to Pandas Series
stripped = stripped.str.strip()               # Use Series.str.strip() to strip leading and trailing whitespace
data=data.assign(Committee=stripped)            # Assign stripped keywords to 'Keyword' column

In [None]:
# Write keywords back to Google sheet
data = data.values.tolist()
# Insert title back into list
header = ('Bill Number', 'Committee') # TODO: Bill Number, Committee
data.insert(0, header)

# Convert list to dict
body = {'values':data}


# Write new keyword/ID pairs back to Keywords sheet with parameters.
result = sheet.values().update(spreadsheetId = SPREADSHEET_ID,
                               range = RANGE_NAME_WRITE,
                               valueInputOption = VALUE_INPUT_OPTION,
                               body=body).execute()

print('{0} cells updated.'.format(result.get('updatedCells')))


print('\ndone')