In [None]:
from __future__ import print_function
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import pandas as pd

In [None]:
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']


# The ID and range of the spreadsheet.
SPREADSHEET_ID = '1DIt56sfquhMC5W4CvfxZ2tavSDCVv9UP0uvqXg7OzNE'
RANGE_NAME_READ = 'FullList!A:H'
RANGE_NAME_WRITE = 'Keywords!A:B'


# Set input type,
# RAW will input as string  /  USER_ENTERED will input dynamically
VALUE_INPUT_OPTION = 'USER_ENTERED'

# Columns labels from the spreadsheet called by RANGE_NAME
columns = ['ID', 'Title', 'Authors', 'Publisher', 'Year', 'States', 'Agency', 'KeyWords']
# Columns to be excluded from the dataframe
exclude = ['Title', 'Authors', 'Publisher', 'Year', 'States', 'Agency']


In [None]:
"""Shows basic usage of the Sheets API.
Prints values from a sample spreadsheet.
"""
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
    with open('token.pickle', 'rb') as token:
        creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
    if creds and creds.expired and creds.refresh_token:
        creds.refresh(Request())
    else:
        flow = InstalledAppFlow.from_client_secrets_file(
            'credentials.json', SCOPES)
        creds = flow.run_local_server(port=0)
    # Save the credentials for the next run
    with open('token.pickle', 'wb') as token:
        pickle.dump(creds, token)

service = build('sheets', 'v4', credentials=creds)

In [None]:
# Call the Sheets API
sheet = service.spreadsheets()
result = sheet.values().get(spreadsheetId=SPREADSHEET_ID,
                            range=RANGE_NAME_READ).execute()
records = pd.DataFrame.from_records(result.get('values',[]),
                                 columns=columns,
                                 exclude=exclude,
                                 index='ID')


# Drop 'ID' row, it is superfluous, and reset the Index
records.drop(['ID'], inplace=True)
records.reset_index(inplace=True)

In [None]:
# Create a dataframe to load expanded keywords into
keywords = pd.DataFrame(columns=['ID', 'Keyword'])
# Expand the dataframe by splitting the string on ',' and assigning the
# associated ID value
for i, row in records.iterrows():
    for word in row['KeyWords'].split(','):
        data = data.append({'ID': i + 1, 'Keyword': word}, ignore_index=True)

In [None]:
# clean data
stripped = pd.Series(data['Keyword'].values)  # Convert Keyword column to Pandas Series
stripped = stripped.str.strip()  # Use Series.str.strip() to strip leading and trailing whitespace
data = data.assign(Keyword=stripped)  # Assign stripped keywords to 'Keyword' column

In [None]:
# Write keywords back to Google sheet
data = data.values.tolist()
# Insert title back into list
header = ('ID', 'Keyword')
data.insert(0, header)

# Convert list to dict
body = {'values':data}


# Write new keyword/ID pairs back to Keywords sheet with parameters.
result = sheet.values().update(spreadsheetId = SPREADSHEET_ID,
                               range = RANGE_NAME_WRITE,
                               valueInputOption = VALUE_INPUT_OPTION,
                               body=body).execute()

print('{0} cells updated.'.format(result.get('updatedCells')))


print('\ndone')
