# Planning Districts - Main Mode to Each School

In [9]:
import pandas as pd

df = pd.read_csv('../Data/SMTO_2015/SMTO_2015_Complete_Input.csv')
df = df[df['Level'] != 'Other']
df.head()

Unnamed: 0,Campus,Level,Status,Mode_Actual,Gender,Licence,Work,Age,HomeZone,Family,...,Domestic.OC,Admission_Avg.SG,Admission_Avg.SC,Admission_Avg.MI,Admission_Avg.YK,Admission_Avg.YG,Admission_Avg.RY,Admission_Avg.OC,Exp_Segment,Exp_Level
0,Scarborough (UTSC),UG,FT,Transit Bus,Female,0,Unknown,20,261,1,...,0.8998,0.893,0.841,0.83,0.817,0.817,0.84,0.824,0.944738,0.944738
1,Downtown Toronto (St. George),Grad,FT,Walk,Female,1,Unknown,25,71,0,...,0.6786,0.893,0.841,0.83,0.817,0.817,0.84,0.824,0.986085,0.986085
2,Downtown Toronto (St. George),UG,FT,Transit Bus,Female,1,Unknown,23,3714,1,...,0.8998,0.893,0.841,0.83,0.817,0.817,0.84,0.824,0.91927,0.91927
3,Downtown Toronto (St. George),UG,FT,Walk,Male,1,Unknown,20,74,0,...,0.8998,0.893,0.841,0.83,0.817,0.817,0.84,0.824,0.91927,0.91927
4,Downtown Toronto (St. George),Grad,FT,Walk,Male,1,Unknown,27,71,0,...,0.6786,0.893,0.841,0.83,0.817,0.817,0.84,0.824,0.986085,0.986085


Now, let us load coordinate and planning district information. To avoid scaling issues, we normalize the coordinates so that the values are between 0 and 1, inclusive. We also plot their correlations.

In [11]:
# Add zone information to df
zones = pd.read_csv('../Data/Zones.csv')
zones.set_index('Zone#', inplace=True)

# Normalize from 0 to 1
zones['X'] = (zones['X'] - zones['X'].min()) / (zones['X'].max() - zones['X'].min())
zones['Y'] = (zones['Y'] - zones['Y'].min()) / (zones['Y'].max() - zones['Y'].min())
zones.corr()

temp = pd.DataFrame([[a[i] for a in (zones['PD'], zones['X'], zones['Y'])] for i in df['HomeZone']], columns=['PD', 'X', 'Y'], index=df.index)
df = pd.concat((df, temp), axis=1)

In [125]:
double_df = df.groupby(['PD', 'School_Codes']).agg(lambda x:x.value_counts().index[0])
double_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Campus,Level,Status,Mode_Actual,Gender,Licence,Work,Age,HomeZone,Family,...,Admission_Avg.SC,Admission_Avg.MI,Admission_Avg.YK,Admission_Avg.YG,Admission_Avg.RY,Admission_Avg.OC,Exp_Segment,Exp_Level,X,Y
PD,School_Codes,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,SG,Downtown Toronto (St. George),UG,FT,GO Train,Female,1,Unknown,43,8501,0,...,0.841,0.83,0.817,0.817,0.84,0.824,0.91927,0.91927,0.540185,0.857544
0,YK,Keele,Grad,PT,GO Bus,Female,1,Unknown,30,8033,0,...,0.841,0.83,0.817,0.817,0.84,0.824,0.977998,0.977998,0.385145,0.479116
1,MI,Mississauga (UTM),UG,FT,Walk,Male,1,Unknown,22,68,0,...,0.841,0.83,0.817,0.817,0.84,0.824,1.26895,1.26895,0.43053,0.439445
1,OC,OCADu,UG,FT,Walk,Female,0,Unknown,20,68,0,...,0.841,0.83,0.817,0.817,0.84,0.824,0.679794,0.679794,0.43053,0.439445
1,RY,RyersonU,UG,FT,Walk,Female,1,Unknown,21,38,0,...,0.841,0.83,0.817,0.817,0.84,0.824,0.895321,0.895321,0.434719,0.440647


In [126]:
PD_df = double_df['Mode'].unstack()
PD_df

School_Codes,MI,OC,RY,SC,SG,YG,YK
PD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,,,,,Transit,,Transit
1,Active,Active,Active,Transit,Active,Transit,Transit
2,Transit,Active,Transit,Transit,Active,Transit,Transit
3,Transit,Transit,Transit,Transit,Transit,Transit,Transit
4,Transit,Transit,Transit,Transit,Transit,Active,Transit
5,Transit,Transit,Transit,Transit,Transit,Transit,Transit
6,Transit,Transit,Transit,Transit,Transit,Transit,Transit
7,Transit,Transit,Transit,Transit,Transit,Transit,Transit
8,Transit,Transit,Transit,Transit,Transit,Transit,Transit
9,Transit,Transit,Transit,Transit,Transit,Transit,Transit


In [148]:
PD_df['n'] = None
sample_sizes = []

for i in PD_df.index.tolist():
    sample_sizes.append(len(df[df['PD'] == i]))

PD_df['n'] = sample_sizes

In [149]:
PD_df

School_Codes,MI,OC,RY,SC,SG,YG,YK,PD,n
PD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,,,,,Transit,,Transit,0,7
1,Active,Active,Active,Transit,Active,Transit,Transit,1,3056
2,Transit,Active,Transit,Transit,Active,Transit,Transit,2,957
3,Transit,Transit,Transit,Transit,Transit,Transit,Transit,3,610
4,Transit,Transit,Transit,Transit,Transit,Active,Transit,4,582
5,Transit,Transit,Transit,Transit,Transit,Transit,Transit,5,259
6,Transit,Transit,Transit,Transit,Transit,Transit,Transit,6,510
7,Transit,Transit,Transit,Transit,Transit,Transit,Transit,7,109
8,Transit,Transit,Transit,Transit,Transit,Transit,Transit,8,356
9,Transit,Transit,Transit,Transit,Transit,Transit,Transit,9,144


In [152]:
del PD_df['PD']

In [153]:
PD_df.to_csv('PD_Modes.csv')