## Modelling neighbourhood change in travel behaviour as a function of social and transport (dis)advantage

In [2]:
%matplotlib inline

import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pysal as ps
import geopandas as gpd

sns.set(style="whitegrid")

Joining the data to CT boundaries, since the models will account for spatial effects

In [3]:
ct = gpd.read_file("spatial_data/ct/CT_2016_gtha_utm17.shp")
df = pd.read_csv("data/var_betas.csv")
ct = pd.merge(ct, df, left_on='CTUID', right_on='ctuid')
ct["pop_beta_1000s"] = ct["pop_beta"] / 1000
ct

Unnamed: 0,CTUID,CTNAME,PRUID,PRNAME,CMAUID,CMAPUID,CMANAME,CMATYPE,old_tor,geometry,...,pop_unemployed_beta,inc_lico_p_beta,inc_total30p_beta,dw_majorrep_beta,dw_lone_prop_beta,dw_no_car_beta,mean_nonworktrip_time_adult_beta,mean_commute_time_adult_beta,R_activities_per_day_beta,pop_beta_1000s
0,5350442.06,0442.06,35,Ontario,535,35535,Toronto,B,0,"POLYGON ((624814.002 4873636.002, 624758.000 4...",...,0.000164,0.001000,0.003787,-0.001965,0.000668,-0.000330,0.167381,0.673006,-0.017467,0.444872
1,5350007.01,0007.01,35,Ontario,535,35535,Toronto,B,1,"POLYGON ((626631.035 4832664.913, 626645.740 4...",...,-0.002876,-0.000325,0.002472,0.000657,0.001550,0.004310,-0.162852,-0.021218,0.001325,-0.008371
2,5350311.04,0311.04,35,Ontario,535,35535,Toronto,B,0,"POLYGON ((621442.946 4845544.057, 621449.695 4...",...,-0.001751,-0.002702,0.004674,-0.001764,0.006444,0.006216,-0.049210,0.256838,-0.008062,-0.028897
3,5350805.15,0805.15,35,Ontario,535,35535,Toronto,B,0,"POLYGON ((659801.892 4858742.915, 659729.106 4...",...,0.000372,0.001660,-0.000280,-0.001028,0.002775,-0.019649,-0.876496,0.221926,-0.020459,0.360652
4,5350007.02,0007.02,35,Ontario,535,35535,Toronto,B,1,"POLYGON ((626990.038 4832439.160, 626858.790 4...",...,-0.001335,-0.001387,0.003905,0.002207,0.001621,0.004310,-0.156809,-0.011974,0.001059,-0.040109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1128,5350312.05,0312.05,35,Ontario,535,35535,Toronto,B,0,"POLYGON ((619695.991 4844898.306, 619554.030 4...",...,-0.000779,0.001657,0.003128,0.005325,0.009468,-0.004743,-0.165751,0.032652,-0.001108,-0.031914
1129,5350013.02,0013.02,35,Ontario,535,35535,Toronto,B,1,"POLYGON ((631601.000 4833437.998, 631188.000 4...",...,-0.000148,0.002014,0.001796,0.000457,0.000743,0.005234,-0.577944,-0.446367,-0.004702,0.170097
1130,5350376.06,0376.06,35,Ontario,535,35535,Toronto,B,0,"POLYGON ((637097.553 4851235.109, 637025.630 4...",...,0.000000,0.007563,-0.020022,0.000000,0.011413,0.024039,0.154008,1.031571,-0.013514,0.022657
1131,5350376.04,0376.04,35,Ontario,535,35535,Toronto,B,0,"POLYGON ((634786.196 4852482.303, 634822.344 4...",...,0.000211,0.001322,0.002977,0.000764,0.002329,0.000697,0.115966,0.197362,-0.014846,0.001800


In [5]:
# compute spatial weights matrix
W = ps.lib.weights.Queen.from_dataframe(ct)
W.transform = 'r' # row normalizing



Regression on activities per day as the dependent variable. Code is then re-run with commute time as the depndent variable. These are both tested as OLS and as a spatial lag model

In [6]:
yvar = "R_activities_per_day_beta"
# yvar = "mean_commute_time_adult_beta"
hoval = ct[yvar]
y = np.array(hoval)
y.shape = (len(hoval), 1)

In [7]:
X = []
X.append(ct["pop_beta_1000s"])
X.append(ct["SESi_beta"])
X.append(ct["Ai_combine_beta"])
X.append(ct["dw_no_car_beta"] * 100)
X.append(ct["pop_elderly_prop_beta"] * 100)
X = np.array(X).T

In [9]:
ols = ps.model.spreg.OLS(y, X, W, spat_diag=True, moran=True, name_y=yvar, name_x=['pop_beta_1000s','SESi_beta','Ai_combine_beta','dw_no_car_beta','pop_elderly_prop_beta'], name_ds='betas', white_test=True)

In [10]:
print(ols.summary)

REGRESSION
----------
SUMMARY OF OUTPUT: ORDINARY LEAST SQUARES
-----------------------------------------
Data set            :       betas
Weights matrix      :     unknown
Dependent Variable  :R_activities_per_day_beta                Number of Observations:        1133
Mean dependent var  :     -0.0080                Number of Variables   :           6
S.D. dependent var  :      0.0100                Degrees of Freedom    :        1127
R-squared           :      0.1740
Adjusted R-squared  :      0.1703
Sum squared residual:       0.093                F-statistic           :     47.4721
Sigma-square        :       0.000                Prob(F-statistic)     :    1.25e-44
S.E. of regression  :       0.009                Log likelihood        :    3721.991
Sigma-square ML     :       0.000                Akaike info criterion :   -7431.981
S.E of regression ML:      0.0091                Schwarz criterion     :   -7401.785

----------------------------------------------------------------

In [11]:
slag = ps.model.spreg.ML_Lag(y, X, W, method='full', epsilon=1e-07, spat_diag=True, vm=False, name_y=yvar, name_x=varX, name_w="w", name_ds="betas")

  warn("Method 'bounded' does not support relative tolerance in x; "
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


In [12]:
print(slag.rho)
print(slag.summary)

0.39959520221362366
REGRESSION
----------
SUMMARY OF OUTPUT: MAXIMUM LIKELIHOOD SPATIAL LAG (METHOD = FULL)
-----------------------------------------------------------------
Data set            :       betas
Weights matrix      :           w
Dependent Variable  :R_activities_per_day_beta                Number of Observations:        1133
Mean dependent var  :     -0.0080                Number of Variables   :           7
S.D. dependent var  :      0.0100                Degrees of Freedom    :        1126
Pseudo R-squared    :      0.2622
Spatial Pseudo R-squared:  0.2147
Sigma-square ML     :       0.000                Log likelihood        :    3768.634
S.E of regression   :       0.009                Akaike info criterion :   -7523.267
                                                 Schwarz criterion     :   -7488.039

------------------------------------------------------------------------------------
            Variable     Coefficient       Std.Error     z-Statistic     Probabil