In [31]:
import numpy as np
import pandas as pd
import seaborn as sns

from authenticator import Authenticator
from sheetmanager import SheetManager

In [32]:
keys = 'credentials.json'
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']

spreadsheetId = '1QbkOSgaHt75LeQY2a9GeUjHajXIHWv3Z-ID7svUPG6w'
dav_data_range = 'JP'
cce_data_range = 'CCE Split - JP by region!A2:D50'
pop_data_range = 'Population by Prefecture!O5:Q52'

In [33]:
authenticator = Authenticator(keys)
creds = authenticator.get_creds(SCOPES)
manager = SheetManager(creds)

In [40]:
# load in the dav figures
dav_df = manager.get_values(sheetId=spreadsheetId,
                            data_range=dav_data_range,
                            )

dav_df = dav_df[dav_df['region_name'] != 'null']
dav_df['date'] = pd.to_datetime(dav_df['date'], format='%Y%m%d')
dav_df = dav_df.set_index('region_name')

In [41]:
# load in the cce split
cce_df = manager.get_values(sheetId=spreadsheetId,
                                data_range=cce_data_range,
                                as_df=True)

cce_df = cce_df[cce_df['region_name'] != 'null']
cce_df = cce_df.set_index('region_name')

In [42]:
pop_df = manager.get_values(sheetId=spreadsheetId,
                           data_range=pop_data_range)

pop_df = pop_df.set_index('region_name')

pop_df['General Populations'] = pop_df['General Populations'] * 1000

In [44]:
pop_df.head()

Unnamed: 0_level_0,General Populations,Internet Penetration (%)
region_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Hokkaido Prefecture,5320000,76.1
Aomori Prefecture,1278000,70.9
Iwate Prefecture,1255000,69.4
Miyagi Prefecture,2323000,77.4
Akita Prefecture,996000,67.1


In [46]:
#dav_df = dav_df.pivot_table(values=['unique_logged_in_dav', 'unique_visitor_dav', 'total'],
#                            index='region_name',
#                            columns='date')

table = dav_df.pivot_table(values=['unique_logged_in_dav', 'unique_visitor_dav', 'total'],
                            index='region_name',
                            aggfunc=np.mean)



In [59]:
df = table.merge(cce_df, how='outer', left_index=True, right_index=True)
df = df.merge(pop_df, how='outer', left_index=True, right_index=True)
df = df.rename(columns={'total': 'Average Total DAV',
               'unique_logged_in_dav': 'Average Unique Logged in DAV',
               'unique_visitor_dav': 'Average Unique Visitor DAV',
               'Casual': 'Casual Users Proportion',
               'Core': 'Core Users Proportion',
               'Emerging': 'Emerging Users Proportion',})

df.head()

df['Internet Population'] = df['General Populations'] * df['Internet Penetration (%)'] / 100
df['YT penetration (of internet)'] = df['Average Total DAV'] / df['Internet Population']
df['YT penetration (of population)'] = df['Average Total DAV'] / df['General Populations']

In [60]:
df.head()

Unnamed: 0_level_0,Average Total DAV,Average Unique Logged in DAV,Average Unique Visitor DAV,Casual Users Proportion,Core Users Proportion,Emerging Users Proportion,General Populations,Internet Penetration (%),Internet Population,YT penetration (of internet),YT penetration (of population)
region_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Aichi Prefecture,3213150.0,1929695.0,1283456.0,0.451036,0.249935,0.29903,7525000,82.5,6208125.0,0.517572,0.426997
Akita Prefecture,224450.8,134652.3,89798.5,0.413382,0.294916,0.291702,996000,67.1,668316.0,0.335845,0.225352
Aomori Prefecture,308340.2,187870.5,120469.7,0.40196,0.307982,0.290058,1278000,70.9,906102.0,0.340293,0.241268
Chiba Prefecture,1935724.0,1168856.0,766867.8,0.442371,0.263618,0.294011,6246000,79.9,4990554.0,0.387878,0.309914
Ehime Prefecture,368960.5,218740.8,150219.7,0.420804,0.288631,0.290564,1364000,73.9,1007996.0,0.366034,0.270499
