In [58]:
import pandas as pd
from bokeh.charts import gridplot, output_file, Chord
from bokeh.io import show
from bokeh.sampledata.les_mis import data
import numpy as np
from gmplot import gmplot

In [2]:
dwelling = pd.read_csv("type of dwelling.csv", index_col=1)
dwelling.fillna(method="ffill", axis=0, inplace=True)
dwelling.replace(to_replace='-', value=np.nan, inplace=True)
dwelling.loc[dwelling['Total HDB'].isnull() & dwelling['Condominiums and Other Apartments'].isnull() & dwelling['Landed \rProperties'].isnull(),'Total'] = 0.00 
dwelling['Total HDB'].fillna(0.0, inplace=True)
dwelling['private_prop'] =  1 - (pd.to_numeric(dwelling['Total HDB'])/pd.to_numeric(dwelling['Total']))
dwelling.index = dwelling.index.str.lower()

In [65]:
def get_group_prop(group_val):
    group_prop = group_val / group_val.sum() 
    group_prop[group_prop < group_prop.quantile(0.9)] = 0.0
    return (group_prop)

In [51]:
def chord_df(df): 
    telco = df.merge(dwelling[['Planning Area','Total']], how='left', left_on='pop_cell_subzone', right_index=True)
    telco = telco.merge(dwelling[['Planning Area','Total']], how='left', left_on='other_cell_subzone', right_index=True)
    telco.columns = ['source','target','value','source_planning','source_pop','target_planning','target_pop']
    telco.dropna(inplace=True)
    
    combined = telco.groupby(['source_planning','target_planning'])['value'].mean().reset_index()
    combined = combined.merge(dwelling.iloc[dwelling.index=='total'][['Planning Area','private_prop']], how='left', 
                          left_on='source_planning', right_on='Planning Area')
    
    proportions = pd.concat([combined.groupby(['source_planning']).apply(lambda x: get_group_prop(x['value'])).reset_index(level=0),combined.groupby(['source_planning']).apply(lambda x: x['target_planning']).reset_index(level=0)], axis=1).iloc[:,[0,1,3]]
    proportions.rename(columns = {'value': 'value_prop'}, inplace=True)
    
    combined = combined.merge(proportions, how='left', on=['source_planning','target_planning'])
    combined = combined[combined['source_planning']!=combined['target_planning']] # cannot be self-referencing
    combined['value_prop'] = (combined['value_prop'] * 1000).astype(int)
    combined.dropna(inplace=True)
    combined.sort_values(['private_prop', 'source_planning'], inplace=True)
    
    return combined[combined['target_planning'].isin(combined['source_planning'].unique())]

In [66]:
morn_weekend = Chord(chord_df(pd.DataFrame.from_csv("df_7am_to_10am_manhoursby_subzone_weekend.csv", index_col=0)), source="source_planning", target="target_planning", value="value_prop", title="7am - 10am (Weekend)")
morn_weekday = Chord(chord_df(pd.DataFrame.from_csv("df_7am_to_10am_manhoursby_subzone_weekday.csv", index_col=0)), source="source_planning", target="target_planning", value="value_prop", title="7am - 10am (Weekday)")
aftn_weekend = Chord(chord_df(pd.DataFrame.from_csv("df_10am_to_5pm_manhoursby_subzone_weekend.csv", index_col=0)), source="source_planning", target="target_planning", value="value_prop", title="10am - 5pm (Weekend)")
aftn_weekday = Chord(chord_df(pd.DataFrame.from_csv("df_10am_to_5pm_manhoursby_subzone_weekday.csv", index_col=0)), source="source_planning", target="target_planning", value="value_prop", title="10am - 5pm (Weekday)")
p = gridplot([[morn_weekend, morn_weekday], [aftn_weekend, aftn_weekday]])

In [67]:
output_file('chord-diagram-bokeh.html', mode="inline")
show(p)

INFO:bokeh.core.state:Session output file 'chord-diagram-bokeh.html' already exists, will be overwritten.


In [68]:
gmap = gmplot.GoogleMapPlotter.from_geocode("Singapore")

In [69]:
gmap.draw("my_map.html")