## Calculate Travel Time Correlation Matrix Between Modes
Author: Callie Clark
Last Updated: 6/8/2024

In [76]:
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import numpy as np
import networkx as nx
from sklearn.metrics import r2_score 
from shapely import wkt

%matplotlib inline
from TT_functions import *
import random
import json

In [77]:

# df_euclidean=pd.read_csv('processed_df/df_tt_S2.csv',index_col=0)
# df_transit=pd.read_csv('processed_df/df_tt_transit_k25.csv',index_col=0)
# df_walk=pd.read_csv('processed_df/df_tt_walk_k10.csv',index_col=0)
# df_drive=pd.read_csv('processed_df/df_tt_drive_k10.csv',index_col=0)
# df_bike=pd.read_csv('processed_df/df_tt_bike_k10.csv',index_col=0)
# df_drive_no_model=pd.read_csv('transit_CT_TT_allvars_all_cutoff=20.csv',index_col=0)
# df_EFAI=pd.read_csv('processed_df/df_tt_EFAI.csv',index_col=0)

df_euclidean=pd.read_csv('travel_time_df/df_tt_S2.csv',index_col=0)
df_transit=pd.read_csv('travel_time_df/df_tt_transit_k25.csv',index_col=0)
df_walk=pd.read_csv('travel_time_df/df_tt_walk_k10.csv',index_col=0)
df_drive=pd.read_csv('travel_time_df/df_tt_drive_k10.csv',index_col=0)
df_bike=pd.read_csv('travel_time_df/df_tt_bike_k10.csv',index_col=0)
df_drive_no_model=pd.read_csv('travel_time_df/df_tt_drive_k10_no_model.csv',index_col=0)
#df_EFAI=pd.read_csv('travel_time_df/df_tt_EFAI.csv',index_col=0)


In [78]:
col_list=['Mo_period_1',
 'Mo_period_2',
 'Mo_period_3',
 'Mo_period_4',
 'Tu_period_1',
 'Tu_period_2',
 'Tu_period_3',
 'Tu_period_4',
 'We_period_1',
 'We_period_2',
 'We_period_3',
 'We_period_4',
 'Th_period_1',
 'Th_period_2',
 'Th_period_3',
 'Th_period_4',
 'Fr_period_1',
 'Fr_period_2',
 'Fr_period_3',
 'Fr_period_4',
 'Sa_period_1',
 'Sa_period_2',
 'Sa_period_3',
 'Sa_period_4',
 'Su_period_1',
 'Su_period_2',
 'Su_period_3',
 'Su_period_4']

In [79]:
def unstack_tts_by_nearest(df,mode):
    count=0
    df.reset_index(inplace=True)
    for i in col_list:
        
        if count==0:
            df[i+"_label"] = df["GEOID"].astype(str) +"_"+df[i+"_nearest"].astype(str)+'_'+i
            df_comb=df[[i+"_label",i+'_tt']].set_index(i+"_label").rename(columns={i+'_tt':'tt_'+mode})
        else:
            df[i+"_label"] = df["GEOID"].astype(str) +"_"+df[i+"_nearest"].astype(str)+'_'+i
            df_comb=pd.concat([df_comb,df[[i+"_label",i+'_tt']].set_index(i+"_label").rename(columns={i+'_tt':'tt_'+mode})])

        count+=1
    return df_comb
        



In [80]:
df_drive_long=unstack_tts_by_nearest(df_drive,mode='drive')
df_transit_long=unstack_tts_by_nearest(df_transit,mode='transit')
df_walk_long=unstack_tts_by_nearest(df_walk,mode='walk')
df_bike_long=unstack_tts_by_nearest(df_bike,mode='bike')
df_euclidean_long=unstack_tts_by_nearest(df_euclidean,mode='euclidean')
df_drive_no_model_long=unstack_tts_by_nearest(df_drive_no_model,mode='drive_no_model')


In [81]:
agg='TT'
df_comb_=df_drive_long.join(df_bike_long,how='inner',lsuffix='_drive',rsuffix='_bike')
df_comb_2=df_comb_.join(df_euclidean_long,how='inner')
df_comb_2.rename(columns={agg:agg+'_euclidean'},inplace=True)
df_comb_3=df_comb_2.join(df_transit_long,how='inner')
df_comb_3.rename(columns={agg:agg+'_transit'},inplace=True)
df_comb_4=df_comb_3.join(df_walk_long,how='inner')
df_comb_4.rename(columns={agg:agg+'_walk'},inplace=True)
df_comb_5=df_comb_4.join(df_drive_no_model_long,how='inner')
df_comb_5.rename(columns={agg:agg+'_drive_no_model_long'},inplace=True)





df_comb_5.dropna(how='any',inplace=True)
df_comb_5.corr(method='pearson').round(2)

Unnamed: 0,tt_drive,tt_bike,tt_euclidean,tt_transit,tt_walk,tt_drive_no_model
tt_drive,1.0,0.86,0.86,0.84,0.85,0.94
tt_bike,0.86,1.0,0.98,0.9,0.98,0.96
tt_euclidean,0.86,0.98,1.0,0.88,0.99,0.95
tt_transit,0.84,0.9,0.88,1.0,0.88,0.9
tt_walk,0.85,0.98,0.99,0.88,1.0,0.95
tt_drive_no_model,0.94,0.96,0.95,0.9,0.95,1.0


In [82]:
df_comb_5.corr(method='pearson').round(2).loc[['tt_euclidean','tt_walk','tt_bike','tt_transit','tt_drive','tt_drive_no_model'],['tt_euclidean','tt_walk','tt_bike','tt_transit','tt_drive','tt_drive_no_model']]#.to_csv('outputs/correlation_values.csv')

Unnamed: 0,tt_euclidean,tt_walk,tt_bike,tt_transit,tt_drive,tt_drive_no_model
tt_euclidean,1.0,0.99,0.98,0.88,0.86,0.95
tt_walk,0.99,1.0,0.98,0.88,0.85,0.95
tt_bike,0.98,0.98,1.0,0.9,0.86,0.96
tt_transit,0.88,0.88,0.9,1.0,0.84,0.9
tt_drive,0.86,0.85,0.86,0.84,1.0,0.94
tt_drive_no_model,0.95,0.95,0.96,0.9,0.94,1.0
