# Connecting to google drive
Felix Zaussinger | 11.11.2020

## Core Analysis Goal(s)
1. Auto-connect to our google sheets documents
    - https://docs.google.com/spreadsheets/d/1kEEcKdP__1XbYKe5-nVlxzAD_P5-EQx-23IAw9SXOoc/edit#gid=370671396

2. Based on
    - https://towardsdatascience.com/how-to-access-google-sheet-data-using-the-python-api-and-convert-to-pandas-dataframe-5ec020564f0e
    - https://developers.google.com/sheets/api/quickstart/python.

## Key Insight(s)
1. It works.

In [1]:
%load_ext autoreload
%autoreload 2

import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import os
import os.path
import numpy as np
import pandas as pd
import seaborn as sns
import configparser
from src.gecm import io
from pathlib import Path

sns.set_theme(
    context='talk', style='ticks', palette='Paired', font='sans-serif',
    font_scale=1.05, color_codes=True, rc=None
)

#### Define paths

In [2]:
abspath = os.path.abspath('')
project_dir = str(Path(abspath).parents[0])

data_raw = os.path.join(project_dir, "data", "raw")
data_processed = os.path.join(project_dir, "data", "processed")

#### Authentification

In [3]:
# init config file parser. methods: config.getboolean, config.getint, ... .
config = configparser.ConfigParser()
fpath_cf = os.path.join(project_dir, 'config.ini')
config.read(fpath_cf)

# read sections
io.config_describe(config)


--- default ---
credentials: 'google_api_credentials.json'
scopes: https://www.googleapis.com/auth/spreadsheets.readonly

--- gdrive_spreadsheet_ids ---
spreadsheet_id_farmers: 1kEEcKdP__1XbYKe5-nVlxzAD_P5-EQx-23IAw9SXOoc
spreadsheet_id_foresters: 1ir8WkfKpyEGkamASbd0pf9IDAE74M4Q1fsHYl4r9F34
spreadsheet_id_tourism: 1EA_mCM9Pp_URb_JavM-mMIqixTvZewSkGC2b-wDa1ns
spreadsheet_id_model: 1CVdDndGD1S8ab3D1KlXYcBcnjK8sBPl68Eth0KMgaEQ

--- gdrive_sheet_names ---
sheet_names_farmers: Farmer_1, Farmer_2
sheet_names_foresters: Forester_1, Forester_2
sheet_names_tourism: SSDA
sheet_names_parameters: model_parameters
sheet_names_calculations: model_calculations


In [4]:
# If modifying these scopes, delete the file token.pickle.
SCOPES = [config.get(section="default", option="scopes")]

# <Your spreadsheet ID>
SPREADSHEET_ID = config.get(section="gdrive_spreadsheet_ids", option="spreadsheet_id_farmers")

# <Your worksheet names>
SHEETS_STRING = config.get(section="gdrive_sheet_names", option="sheet_names_farmers")
SHEETS = io.parse_list(config_string=SHEETS_STRING)

# API credentials
credentials_fpath = os.path.join(project_dir, 'google_api_credentials.json')

#### Download

In [5]:
sheet_dict = {}

for i, sheet_name in enumerate(SHEETS):
    print(sheet_name)

    # 1) fetch data
    data_dict = io.get_google_sheet(
        credentials=credentials_fpath,
        spreadsheet_id=SPREADSHEET_ID,
        range_name=sheet_name,
        scopes=SCOPES
    )

    # 2) convert to data frame
    df_raw = io.gsheet2df(data_dict, header=0, stop=11)
    df_raw = df_raw.set_index("Round")

    # 3) convert to numeric
    df = df_raw.copy()
    for col in df.columns:
        df[col] = pd.to_numeric(df[col], errors="coerce", downcast="integer")

    # 4) append to dict
    sheet_dict[sheet_name] = df

Farmer_1
Farmer_2


In [6]:
df_all = pd.concat(sheet_dict.values(), keys=sheet_dict.keys())
df_all

Unnamed: 0_level_0,Unnamed: 1_level_0,Plot,Sheep,Cattle,Native,Teamwork,Sheep,Cattle
Unnamed: 0_level_1,Round,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Farmer_1,1,13.0,20.0,0.0,-20,,,
Farmer_1,2,,0.0,0.0,0,,,
Farmer_1,3,,0.0,0.0,0,,,
Farmer_1,4,,30.0,0.0,-30,,,
Farmer_1,5,,0.0,0.0,0,,,
Farmer_1,6,,0.0,0.0,0,,,
Farmer_1,7,,0.0,0.0,0,,,
Farmer_1,8,,0.0,0.0,0,,,
Farmer_1,9,,0.0,0.0,0,,,
Farmer_1,10,,0.0,0.0,0,,,


In [7]:
df_all.index = df_all.index.set_names(["player", "round"])
df_all = df_all.reset_index()
df_all["round"] = pd.to_numeric(df_all["round"], errors="coerce", downcast="integer")

In [8]:
df_all.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 9 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   player    20 non-null     object 
 1   round     20 non-null     int8   
 2   Plot      2 non-null      float64
 3   Sheep     10 non-null     float64
 4   Cattle    10 non-null     float64
 5   Native    20 non-null     int8   
 6   Teamwork  0 non-null      float64
 7   Sheep     10 non-null     float64
 8   Cattle    10 non-null     float64
dtypes: float64(6), int8(2), object(1)
memory usage: 1.3+ KB


In [9]:
df_final = df_all.set_index(["round", "player"]).sort_index()

# https://stackoverflow.com/questions/25386870/pandas-plotting-with-multi-index
df_final = df_final.unstack(level="player")
df_final

Unnamed: 0_level_0,Plot,Plot,Sheep,Sheep,Cattle,Cattle,Native,Native,Teamwork,Teamwork,Sheep,Sheep,Cattle,Cattle
player,Farmer_1,Farmer_2,Farmer_1,Farmer_2,Farmer_1,Farmer_2,Farmer_1,Farmer_2,Farmer_1,Farmer_2,Farmer_1,Farmer_2,Farmer_1,Farmer_2
round,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
1,13.0,14.0,20.0,,0.0,,-20,-90,,,,50.0,,40.0
2,,,0.0,,0.0,,0,0,,,,0.0,,0.0
3,,,0.0,,0.0,,0,0,,,,0.0,,0.0
4,,,30.0,,0.0,,-30,0,,,,0.0,,0.0
5,,,0.0,,0.0,,0,0,,,,0.0,,0.0
6,,,0.0,,0.0,,0,0,,,,0.0,,0.0
7,,,0.0,,0.0,,0,0,,,,0.0,,0.0
8,,,0.0,,0.0,,0,0,,,,0.0,,0.0
9,,,0.0,,0.0,,0,0,,,,0.0,,0.0
10,,,0.0,,0.0,,0,0,,,,0.0,,0.0
