### Transit ridership panel data models

Script to estimate the models of MUNI bus and rail ridership at a TAZ level for the purpose of determining the effect of TNCs on transit ridership in San Francisco. 

gde 3.28.2020

In [2]:
import pandas as pd
import numpy as np

from linearmodels.panel import PanelOLS

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 20)

%matplotlib inline

In [3]:
# read the data and structure the data
data = pd.read_csv('data/PanelEstimationFile.csv', thousands = ',')


In [4]:
# calculate dervied fields 

# setting flags
data['BUS'] = np.where(data['MODE']=='BUS', 1, 0)
data['RAIL'] = np.where(data['MODE']=='RAIL', 1, 0)
data['YEAR_2015'] = np.where(data['YEAR']==2015, 1, 0)

# segmenting bus vs rail coefficients
data['TRIP_STOPS_DELIVERED_BUS'] = data['TRIP_STOPS_DELIVERED']*data['BUS']
data['TRIP_STOPS_DELIVERED_RAIL'] = data['TRIP_STOPS_DELIVERED']*data['RAIL']

data['AVG_TNC_SMOOTH_BUS'] = data['AVG_TNC_SMOOTH'] * data['BUS']
data['AVG_TNC_SMOOTH_RAIL'] = data['AVG_TNC_SMOOTH'] * data['RAIL']

data['BUS_TRIP_STOPS_COMPETING_BUS'] = data['BUS_TRIP_STOPS_COMPETING'] * data['BUS']

# combining BART and Caltrain
data['BART_CT_AVG_RIDE_SMOOTH'] = data['BART_AVG_RIDE_SMOOTH'] + data['CALTRAIN_AVG_RIDE_SMOOTH']


In [5]:
# Set the index
data = data.set_index(['ID', 'YEAR'])
data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,SFTAZ,MODE,TOD2,DIST,DISTNAME,AVG_RIDE,ROUTES,STOPS,TRIP_STOPS,TRIP_STOPS_DELIVERED,RAPID_SHARE,SERVMILES_S_DELIVERED,RUNSPEED,ONTIME5,BUS_TRIP_STOPS_COMPETING,HHLDS,POP,TOTALEMP,EMPRES,ACCESS_30,HHLDS_0_VEH_SHARE,BART_AVG_RIDE,CALTRAIN_AVG_RIDE,AVG_TNC,HHLDS_SMOOTH,POP_SMOOTH,TOTALEMP_SMOOTH,EMPRES_SMOOTH,HHLDS_0_VEH_SHARE_SMOOTH,BART_AVG_RIDE_SMOOTH,CALTRAIN_AVG_RIDE_SMOOTH,AVG_TNC_SMOOTH,BUS,RAIL,YEAR_2015,TRIP_STOPS_DELIVERED_BUS,TRIP_STOPS_DELIVERED_RAIL,AVG_TNC_SMOOTH_BUS,AVG_TNC_SMOOTH_RAIL,BUS_TRIP_STOPS_COMPETING_BUS,BART_CT_AVG_RIDE_SMOOTH
ID,YEAR,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1
1_BUS_0300-0859,2010,0,1,BUS,0300-0859,9,'Bayshore',11.503687,2.0,3.0,36.0,34.884,0.0,3.162816,12.26,0.59453,14.992544,306,1303,50,506,302980.0,0.227414,0.0,0.0,0.0,71.422712,297.2428,12.22362,115.311125,0.241549,0.0,0.0,0.0,1,0,0,34.884,0.0,0.0,0.0,14.992544,0.0
1_BUS_0300-0859,2015,1,1,BUS,0300-0859,9,'Bayshore',33.240093,3.0,3.0,50.0,49.8,0.8,4.519848,5.47,0.448981,18.880575,317,1353,69,546,351207.0,0.169343,0.0,0.0,3.395,74.882905,311.916768,17.408276,125.629692,0.197806,0.0,0.0,0.659106,1,0,1,49.8,0.0,0.659106,0.0,18.880575,0.0
1_BUS_0900-1559,2010,2,1,BUS,0900-1559,9,'Bayshore',50.245435,2.0,3.0,84.0,81.396,0.0,7.366338,12.81,0.572105,31.462386,306,1303,50,506,219577.0,0.227414,0.0,0.0,0.0,71.422712,297.2428,12.22362,115.311125,0.241549,0.0,0.0,0.0,1,0,0,81.396,0.0,0.0,0.0,31.462386,0.0
1_BUS_0900-1559,2015,3,1,BUS,0900-1559,9,'Bayshore',56.483018,2.0,3.0,119.0,118.524,0.890756,10.74684,5.76,0.505889,41.739716,317,1353,69,546,189411.0,0.169343,0.0,0.0,3.725,74.882905,311.916768,17.408276,125.629692,0.197806,0.0,0.0,0.855811,1,0,1,118.524,0.0,0.855811,0.0,41.739716,0.0
1_BUS_1600-1859,2010,4,1,BUS,1600-1859,9,'Bayshore',31.448123,2.0,3.0,36.0,34.884,0.0,3.157002,11.63,0.408982,13.744592,306,1303,50,506,124505.0,0.227414,0.0,0.0,0.0,71.422712,297.2428,12.22362,115.311125,0.241549,0.0,0.0,0.0,1,0,0,34.884,0.0,0.0,0.0,13.744592,0.0


In [6]:
# match Greg's combined TAZ model

mod = PanelOLS.from_formula("np.log(1+AVG_RIDE) \
                            ~ np.log(1+ACCESS_30) \
                            + np.log(1+ROUTES) \
                            + np.log(1+TRIP_STOPS_DELIVERED_BUS) \
                            + np.log(1+TRIP_STOPS_DELIVERED_RAIL) \
                            + ONTIME5 \
                            + np.log(1+BART_CT_AVG_RIDE_SMOOTH) \
                            + np.log(1+BUS_TRIP_STOPS_COMPETING_BUS) \
                            + np.log(1+AVG_TNC_SMOOTH_BUS) \
                            + np.log(1+AVG_TNC_SMOOTH_RAIL) \
                            + YEAR_2015 \
                            + EntityEffects",
             data = data)

res = mod.fit()
print(res)

                           PanelOLS Estimation Summary                            
Dep. Variable:     np.log(1 + AVG_RIDE)   R-squared:                        0.3234
Estimator:                     PanelOLS   R-squared (Between):              0.8894
No. Observations:                  7358   R-squared (Within):               0.3234
Date:                  Thu, Apr 23 2020   R-squared (Overall):              0.8879
Time:                          11:17:02   Log-likelihood                    1897.1
Cov. Estimator:              Unadjusted                                           
                                          F-statistic:                      175.41
Entities:                          3679   P-value                           0.0000
Avg Obs:                         2.0000   Distribution:                 F(10,3669)
Min Obs:                         2.0000                                           
Max Obs:                         2.0000   F-statistic (robust):             175.41
    