# Planning Districts - Main Mode to Each School

In [2]:
import pandas as pd

df = pd.read_csv('../Data/SMTO_2015/SMTO_2015_Complete_Input.csv')
df = df[df['Level'] != 'Other']
df.head()

Unnamed: 0,Campus,Level,Status,Mode_Actual,Gender,Licence,Work,Age,HomeZone,Family,...,Domestic.OC,Admission_Avg.SG,Admission_Avg.SC,Admission_Avg.MI,Admission_Avg.YK,Admission_Avg.YG,Admission_Avg.RY,Admission_Avg.OC,Exp_Segment,Exp_Level
0,Scarborough (UTSC),UG,FT,Transit Bus,Female,0,Unknown,20,261,1,...,0.8998,0.893,0.841,0.83,0.817,0.817,0.84,0.824,0.944738,0.944738
1,Downtown Toronto (St. George),Grad,FT,Walk,Female,1,Unknown,25,71,0,...,0.6786,0.893,0.841,0.83,0.817,0.817,0.84,0.824,0.986085,0.986085
2,Downtown Toronto (St. George),UG,FT,Transit Bus,Female,1,Unknown,23,3714,1,...,0.8998,0.893,0.841,0.83,0.817,0.817,0.84,0.824,0.91927,0.91927
3,Downtown Toronto (St. George),UG,FT,Walk,Male,1,Unknown,20,74,0,...,0.8998,0.893,0.841,0.83,0.817,0.817,0.84,0.824,0.91927,0.91927
4,Downtown Toronto (St. George),Grad,FT,Walk,Male,1,Unknown,27,71,0,...,0.6786,0.893,0.841,0.83,0.817,0.817,0.84,0.824,0.986085,0.986085


Now, let us load coordinate and planning district information. To avoid scaling issues, we normalize the coordinates so that the values are between 0 and 1, inclusive. We also plot their correlations.

In [3]:
# Add zone information to df
zones = pd.read_csv('../Data/Zones.csv')
zones.set_index('Zone#', inplace=True)

# Normalize from 0 to 1
zones['X'] = (zones['X'] - zones['X'].min()) / (zones['X'].max() - zones['X'].min())
zones['Y'] = (zones['Y'] - zones['Y'].min()) / (zones['Y'].max() - zones['Y'].min())
zones.corr()

temp = pd.DataFrame([[a[i] for a in (zones['PD'], zones['X'], zones['Y'])] for i in df['HomeZone']], columns=['PD', 'X', 'Y'], index=df.index)
df = pd.concat((df, temp), axis=1)

In [5]:
double_df = df.groupby(['PD', 'School_Codes'])['Mode'].agg(lambda x:x.value_counts().index[0])
double_df

PD  School_Codes
0   SG              Transit
    YK              Transit
1   MI               Active
    OC               Active
    RY               Active
    SC              Transit
    SG               Active
    YG              Transit
    YK              Transit
2   MI              Transit
    OC               Active
    RY              Transit
    SC              Transit
    SG               Active
    YG              Transit
    YK              Transit
3   MI              Transit
    OC              Transit
    RY              Transit
    SC              Transit
    SG              Transit
    YG              Transit
    YK              Transit
4   MI              Transit
    OC              Transit
    RY              Transit
    SC              Transit
    SG              Transit
    YG               Active
    YK              Transit
                     ...   
39  OC              Transit
    RY              Transit
    SG              Transit
    YG                 Auto
   

In [20]:
multi_df.loc[0].loc['SG']

Mode
Active     0.000000
Auto       0.333333
Transit    0.666667
Name: SG, dtype: float64

In [13]:
multi_df = df.groupby(['PD', 'School_Codes'])['Mode'].value_counts(normalize = True).unstack().fillna(0)
multi_df

Unnamed: 0_level_0,Mode,Active,Auto,Transit
PD,School_Codes,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,SG,0.000000,0.333333,0.666667
0,YK,0.000000,0.250000,0.750000
1,MI,0.500000,0.277778,0.222222
1,OC,0.800000,0.008000,0.192000
1,RY,0.843750,0.006250,0.150000
1,SC,0.083333,0.145833,0.770833
1,SG,0.874664,0.008505,0.116831
1,YG,0.000000,0.090909,0.909091
1,YK,0.021429,0.071429,0.907143
2,MI,0.200000,0.333333,0.466667


In [6]:
PD_df = double_df.unstack()
PD_df

School_Codes,MI,OC,RY,SC,SG,YG,YK
PD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,,,,,Transit,,Transit
1,Active,Active,Active,Transit,Active,Transit,Transit
2,Transit,Active,Transit,Transit,Active,Transit,Transit
3,Transit,Transit,Transit,Transit,Transit,Transit,Transit
4,Transit,Transit,Transit,Transit,Transit,Active,Transit
5,Transit,Transit,Transit,Transit,Transit,Transit,Transit
6,Transit,Transit,Transit,Transit,Transit,Transit,Transit
7,Transit,Transit,Transit,Transit,Transit,Transit,Transit
8,Transit,Transit,Transit,Transit,Transit,Transit,Transit
9,Transit,Transit,Transit,Transit,Transit,Transit,Transit


In [148]:
PD_df['n'] = None
sample_sizes = []

for i in PD_df.index.tolist():
    sample_sizes.append(len(df[df['PD'] == i]))

PD_df['n'] = sample_sizes

In [149]:
PD_df

School_Codes,MI,OC,RY,SC,SG,YG,YK,PD,n
PD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,,,,,Transit,,Transit,0,7
1,Active,Active,Active,Transit,Active,Transit,Transit,1,3056
2,Transit,Active,Transit,Transit,Active,Transit,Transit,2,957
3,Transit,Transit,Transit,Transit,Transit,Transit,Transit,3,610
4,Transit,Transit,Transit,Transit,Transit,Active,Transit,4,582
5,Transit,Transit,Transit,Transit,Transit,Transit,Transit,5,259
6,Transit,Transit,Transit,Transit,Transit,Transit,Transit,6,510
7,Transit,Transit,Transit,Transit,Transit,Transit,Transit,7,109
8,Transit,Transit,Transit,Transit,Transit,Transit,Transit,8,356
9,Transit,Transit,Transit,Transit,Transit,Transit,Transit,9,144


In [152]:
del PD_df['PD']

In [153]:
PD_df.to_csv('PD_Modes.csv')