In [52]:
# Import libraries 
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
from apiclient.discovery import build #Util for API calls
import gspread # For connection to google sheets
from oauth2client.service_account import ServiceAccountCredentials
from df2gspread import df2gspread as d2g # d2g will be used once ready to upload data back to sheets

In [71]:
# Configure the connection 
scopes = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive.readonly']
key_file_location = 'gsheetsprivkey\serviceaccount.json'

# Give the path to the Service Account Credential json file 
credentials = ServiceAccountCredentials.from_json_keyfile_name(key_file_location,scopes)

# Authorise your Jupyter Notebook to connect to Google Drive API using private key credentials in 'credentials'
gc = gspread.authorize(credentials)

# Function to be able to get service for google drive
def get_service(api_name, api_version, scopes=scopes, key_file_location=key_file_location):
    """Get a service that communicates to a Google API.

    Args:
        api_name: The name of the api to connect to.
        api_version: The api version to connect to.
        scopes: A list auth scopes to authorize for the application.
        key_file_location: The path to a valid service account JSON key file.

    Returns:
        A service that is connected to the specified API.
    """

    # Build the service object.
    service = build(api_name, api_version, credentials=credentials)

    return service

# Retrieve list of files in Google Drive
# Interested only in spreadsheets within folder, hence using query as arg option within list function
itemsInDrive = get_service("drive","v3").files().list(q="mimeType='application/vnd.google-apps.spreadsheet'").execute()

#Retrieving value of 'files' key which has details of all the files we are interested in.
filesInDrive = itemsInDrive['files']

In [117]:
# Converting results to Dictionary with 'name' as the index of each entry.
filesInDriveDict = {};

for file in filesInDrive:
    itemList = [item for item in file.items() if item[0] != 'name' ] #Strip out name 
    filesInDriveDict[file['name']] = dict(itemList) # Build dict with value of 'name' as key
        

In [130]:
# Helper function that retrieves the id from Google Drive to pass to Google Sheets API
# season naming format is EPL_YYYY_YY , E.g. EPL_2004_05
# Pass 'ALL' for all seasons

def get_fileID_from_name(seasonName,filesInDriveDict):
    if seasonName.upper() == 'ALL':
        return 'Will pass all files'
    if seasonName in filesInDriveDict:
        return filesInDriveDict[seasonName]['id']
    raise Exception("File name passed to get_fileID_from_name is invalid")


In [132]:
# ID of file 
spreadsheet_key = get_fileID_from_name("EPL_2013_14",filesInDriveDict)
#Opening the worksheet by using Worksheet ID
workbook = gc.open_by_key(spreadsheet_key)
#By default load 1st sheet , i.e. index = 0
sheet = workbook.get_worksheet(0)
#Pulling the data and transform it to the data frame
#get_all_values returns list of lists with first list as column headers
values = sheet.get_all_values()
# 1st column is header , remaining are actual values
pd_data = pd.DataFrame(values[1:], columns = values[0])
pd_data



Unnamed: 0,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,...,BbAv<2.5,BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,PSCH,PSCD,PSCA
0,E0,17/08/13,Arsenal,Aston Villa,1,3,A,1,1,D,...,2.37,,,,,,,1.44,5,8.05
1,E0,17/08/13,Liverpool,Stoke,1,0,H,1,0,H,...,2.02,,,,,,,1.42,4.62,10.19
2,E0,17/08/13,Norwich,Everton,2,2,D,0,0,D,...,1.82,,,,,,,3.81,3.27,2.21
3,E0,17/08/13,Sunderland,Fulham,0,1,A,0,0,D,...,1.77,,,,,,,2.52,3.23,3.16
4,E0,17/08/13,Swansea,Man United,1,4,A,0,2,A,...,1.98,,,,,,,3.62,3.41,2.22
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,E0,11/05/14,Norwich,Arsenal,0,2,A,0,0,D,...,2.36,25,0.75,1.85,1.8,2.12,2.07,4.97,4.29,1.7
376,E0,11/05/14,Southampton,Man United,1,1,D,1,0,H,...,2.12,27,0,1.9,1.86,2.06,1.99,2.77,3.92,2.47
377,E0,11/05/14,Sunderland,Swansea,1,3,A,0,2,A,...,1.94,24,-0.5,2.4,2.3,1.68,1.63,2.19,3.6,3.5
378,E0,11/05/14,Tottenham,Aston Villa,3,0,H,3,0,H,...,2.36,29,-1,1.86,1.77,2.2,2.1,1.47,4.75,7.64


In [133]:
# Function that takes list of names of files and concats the dataframe

