In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import linearmodels as lm
import matplotlib
import math
import statsmodels.formula.api as smf

from linearmodels.panel import PanelOLS
from linearmodels.panel import RandomEffects
from linearmodels.panel import FirstDifferenceOLS
from linearmodels.panel import compare
from matplotlib import pyplot as plt

from pandas.api.types import is_numeric_dtype

pd.set_option('display.max_columns', 500)

%matplotlib inline

In [25]:
# read the main data set
df=pd.read_csv('final_dataset.csv',
                    encoding='utf-8')


In [26]:
# separate Bus, Rail, DemandResponsive and Other
bus = df[df['RailBus']=='Bus']
rail = df[df['RailBus']=='Rail']
demand_responsive = df[df['RailBus']=='DemandResponsive']
other = df[df['RailBus']=='Other']


In [27]:
# set the indices
bus=bus.set_index(['MNAME','Year'])
rail=rail.set_index(['MNAME','Year'])

In [28]:
# keep only the numeric columns -- the estimation will give an error otherwise
bus = bus.select_dtypes(include=[np.number])
rail = rail.select_dtypes(include=[np.number])

In [29]:
# create a log of all fields
for col in bus.columns:
    bus[col+'_log'] = np.log(bus[col]+1)
    
for col in rail.columns:
    rail[col+'_log'] = np.log(rail[col]+1)    

In [31]:
# check the data
bus.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,Id2,Cluster_upt,upt,VRM,Total_HH,HH_0Veh,HH_1Veh,HH_2Veh,HH_3Veh,HH_4+Veh,CBSA Code,Pop_TSD,Tot_Emp_TSD,Tot_Retail_TSD,gasPrice,HH_MED_INC,HH_MEAN_INC,INC_U35,INC_35_100,INC_100P,Tot_Pop,Tot_Instate_Pop,Tot_Outstate_Pop,Tot_NonUSA_POP,Total_Median_Income_Individual,Native_Instate_Med_Inc_Indiv,Native_Outstate_Med_Inc_Indiv,Total_Pop_Poverty,Pop_Below100_Poverty,Pop_Below150_Poverty,Pop_Above150_Poverty,Age_under18,Age_18to64,Age_over64,Unnamed: 44,Unnamed: 45,Unnamed: 46,Unnamed: 47,Total_Labor_MSA,Unemployment_Labor_MSA,Employment_Labor:MSA,Unemployment_Rate_MSA,Unnamed: 0_log,Id2_log,Cluster_upt_log,upt_log,VRM_log,Total_HH_log,HH_0Veh_log,HH_1Veh_log,HH_2Veh_log,HH_3Veh_log,HH_4+Veh_log,CBSA Code_log,Pop_TSD_log,Tot_Emp_TSD_log,Tot_Retail_TSD_log,gasPrice_log,HH_MED_INC_log,HH_MEAN_INC_log,INC_U35_log,INC_35_100_log,INC_100P_log,Tot_Pop_log,Tot_Instate_Pop_log,Tot_Outstate_Pop_log,Tot_NonUSA_POP_log,Total_Median_Income_Individual_log,Native_Instate_Med_Inc_Indiv_log,Native_Outstate_Med_Inc_Indiv_log,Total_Pop_Poverty_log,Pop_Below100_Poverty_log,Pop_Below150_Poverty_log,Pop_Above150_Poverty_log,Age_under18_log,Age_18to64_log,Age_over64_log,Unnamed: 44_log,Unnamed: 45_log,Unnamed: 46_log,Unnamed: 47_log,Total_Labor_MSA_log,Unemployment_Labor_MSA_log,Employment_Labor:MSA_log,Unemployment_Rate_MSA_log
MNAME,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1
"Akron, OH Metro Area",2005,0,10420,1,1294595.792,572693.6458,276416,20661,85198,112380,40847,17330,10420.0,100521.5,41364.0,4110.0,2.246667,44719,58676,38.7,46.8,14.5,684459,521491,138184,2945,24612,24899,24755,683742,11.3,8.1,80.7,23.7,63.2,13.1,,,,,,,,,0.0,9.251578,0.693147,14.07371,13.258108,12.529666,9.936052,11.352745,11.62965,10.617613,9.760252,9.251578,11.518137,10.63019,8.321422,1.177629,10.708176,10.979803,3.681351,3.867026,2.74084,13.436385,13.164449,11.836349,7.988204,10.11103,10.122623,10.116823,13.435337,2.509599,2.208274,4.403054,3.206803,4.162003,2.646175,,,,,,,,
"Akron, OH Metro Area",2006,3,10420,1,1363776.042,536282.7292,280837,19248,97567,109326,38795,15901,10420.0,99593.0,43378.0,3771.0,2.54425,44507,58859,38.8,47.2,14.0,700943,530777,139714,4695,23925,24129,23644,684898,12.7,7.8,79.5,23.3,63.3,13.4,,,,,,,,,1.386294,9.251578,0.693147,14.125769,13.192419,12.545533,9.865214,11.488305,11.602099,10.566072,9.6742,9.251578,11.508857,10.677731,8.235361,1.265327,10.703424,10.982917,3.683867,3.875359,2.70805,13.460183,13.182099,11.84736,8.454466,10.082721,10.091211,10.070907,13.437027,2.617396,2.174752,4.388257,3.190476,4.16356,2.667228,,,,,,,,
"Akron, OH Metro Area",2007,6,10420,1,1237511.271,619889.9375,281669,21066,94071,113232,36917,16383,10420.0,98664.5,41173.0,3626.0,2.821583,47898,63454,37.2,46.1,16.7,699356,536148,135625,3455,25217,25202,24776,682807,13.4,7.6,79.0,23.3,63.4,13.3,,,,,,,,,1.94591,9.251578,0.693147,14.028614,13.337299,12.548491,9.955463,11.451816,11.637203,10.516455,9.704061,9.251578,11.499491,10.625562,8.196161,1.340665,10.77685,11.058086,3.642836,3.852273,2.873565,13.457917,13.192167,11.817656,8.147867,10.135313,10.134718,10.117671,13.433969,2.667228,2.151762,4.382027,3.190476,4.165114,2.66026,,,,,,,,
"Akron, OH Metro Area",2008,9,10420,1,1246293.688,565919.2292,281731,19035,100229,108332,38548,15587,10420.0,97736.0,41542.0,3830.0,3.2295,50036,65801,36.4,45.1,18.5,698553,529519,137011,4452,25357,25559,25281,681880,12.1,8.2,79.7,22.9,63.5,13.6,,,,,,,,,2.302585,9.251578,0.693147,14.035685,13.246208,12.548712,9.854087,11.515223,11.592965,10.559685,9.654257,9.251578,11.490035,10.634484,8.250881,1.442084,10.820518,11.094406,3.621671,3.830813,2.970414,13.456768,13.179726,11.827824,8.401333,10.14085,10.148784,10.137848,13.43261,2.572612,2.219203,4.390739,3.173878,4.166665,2.681022,,,,,,,,
"Akron, OH Metro Area",2009,12,10420,1,1000725.896,522729.1458,281769,23447,97811,108215,36339,15957,10420.0,96807.5,40430.0,3743.0,2.33225,47482,60668,36.8,48.1,15.1,699935,537803,134499,2466,24856,24810,24817,685405,14.7,7.2,78.1,22.4,63.6,14.0,,,,,,,,,2.564949,9.251578,0.693147,13.816237,13.166821,12.548846,10.06254,11.490803,11.591885,10.500674,9.677716,9.251578,11.48049,10.607352,8.22791,1.203648,10.768127,11.013188,3.632309,3.893859,2.778819,13.458744,13.195249,11.809319,7.810758,10.120895,10.119042,10.119324,13.437767,2.753661,2.104134,4.370713,3.152736,4.168214,2.70805,,,,,,,,


In [32]:
rail.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,Id2,Cluster_upt,upt,VRM,Total_HH,HH_0Veh,HH_1Veh,HH_2Veh,HH_3Veh,HH_4+Veh,CBSA Code,Pop_TSD,Tot_Emp_TSD,Tot_Retail_TSD,gasPrice,HH_MED_INC,HH_MEAN_INC,INC_U35,INC_35_100,INC_100P,Tot_Pop,Tot_Instate_Pop,Tot_Outstate_Pop,Tot_NonUSA_POP,Total_Median_Income_Individual,Native_Instate_Med_Inc_Indiv,Native_Outstate_Med_Inc_Indiv,Total_Pop_Poverty,Pop_Below100_Poverty,Pop_Below150_Poverty,Pop_Above150_Poverty,Age_under18,Age_18to64,Age_over64,Unnamed: 44,Unnamed: 45,Unnamed: 46,Unnamed: 47,Total_Labor_MSA,Unemployment_Labor_MSA,Employment_Labor:MSA,Unemployment_Rate_MSA,Unnamed: 0_log,Id2_log,Cluster_upt_log,upt_log,VRM_log,Total_HH_log,HH_0Veh_log,HH_1Veh_log,HH_2Veh_log,HH_3Veh_log,HH_4+Veh_log,CBSA Code_log,Pop_TSD_log,Tot_Emp_TSD_log,Tot_Retail_TSD_log,gasPrice_log,HH_MED_INC_log,HH_MEAN_INC_log,INC_U35_log,INC_35_100_log,INC_100P_log,Tot_Pop_log,Tot_Instate_Pop_log,Tot_Outstate_Pop_log,Tot_NonUSA_POP_log,Total_Median_Income_Individual_log,Native_Instate_Med_Inc_Indiv_log,Native_Outstate_Med_Inc_Indiv_log,Total_Pop_Poverty_log,Pop_Below100_Poverty_log,Pop_Below150_Poverty_log,Pop_Above150_Poverty_log,Age_under18_log,Age_18to64_log,Age_over64_log,Unnamed: 44_log,Unnamed: 45_log,Unnamed: 46_log,Unnamed: 47_log,Total_Labor_MSA_log,Unemployment_Labor_MSA_log,Employment_Labor:MSA_log,Unemployment_Rate_MSA_log
MNAME,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1
"Akron, OH Metro Area",2005,2,10420,1,867829.7,211674.3333,276416,20661,85198,112380,40847,17330,10420.0,100521.5,41364.0,4110.0,2.246667,44719,58676,38.7,46.8,14.5,684459,521491,138184,2945,24612,24899,24755,683742,11.3,8.1,80.7,23.7,63.2,13.1,,,,,,,,,1.098612,9.251578,0.693147,13.673752,12.262809,12.529666,9.936052,11.352745,11.62965,10.617613,9.760252,9.251578,11.518137,10.63019,8.321422,1.177629,10.708176,10.979803,3.681351,3.867026,2.74084,13.436385,13.164449,11.836349,7.988204,10.11103,10.122623,10.116823,13.435337,2.509599,2.208274,4.403054,3.206803,4.162003,2.646175,,,,,,,,
"Akron, OH Metro Area",2006,5,10420,1,911439.7,219053.25,280837,19248,97567,109326,38795,15901,10420.0,99593.0,43378.0,3771.0,2.54425,44507,58859,38.8,47.2,14.0,700943,530777,139714,4695,23925,24129,23644,684898,12.7,7.8,79.5,23.3,63.3,13.4,,,,,,,,,1.791759,9.251578,0.693147,13.722782,12.297075,12.545533,9.865214,11.488305,11.602099,10.566072,9.6742,9.251578,11.508857,10.677731,8.235361,1.265327,10.703424,10.982917,3.683867,3.875359,2.70805,13.460183,13.182099,11.84736,8.454466,10.082721,10.091211,10.070907,13.437027,2.617396,2.174752,4.388257,3.190476,4.16356,2.667228,,,,,,,,
"Akron, OH Metro Area",2007,8,10420,1,956162.6,209295.1667,281669,21066,94071,113232,36917,16383,10420.0,98664.5,41173.0,3626.0,2.821583,47898,63454,37.2,46.1,16.7,699356,536148,135625,3455,25217,25202,24776,682807,13.4,7.6,79.0,23.3,63.4,13.3,,,,,,,,,2.197225,9.251578,0.693147,13.770684,12.251506,12.548491,9.955463,11.451816,11.637203,10.516455,9.704061,9.251578,11.499491,10.625562,8.196161,1.340665,10.77685,11.058086,3.642836,3.852273,2.873565,13.457917,13.192167,11.817656,8.147867,10.135313,10.134718,10.117671,13.433969,2.667228,2.151762,4.382027,3.190476,4.165114,2.66026,,,,,,,,
"Akron, OH Metro Area",2008,11,10420,1,1002934.0,203862.4167,281731,19035,100229,108332,38548,15587,10420.0,97736.0,41542.0,3830.0,3.2295,50036,65801,36.4,45.1,18.5,698553,529519,137011,4452,25357,25559,25281,681880,12.1,8.2,79.7,22.9,63.5,13.6,,,,,,,,,2.484907,9.251578,0.693147,13.818441,12.225206,12.548712,9.854087,11.515223,11.592965,10.559685,9.654257,9.251578,11.490035,10.634484,8.250881,1.442084,10.820518,11.094406,3.621671,3.830813,2.970414,13.456768,13.179726,11.827824,8.401333,10.14085,10.148784,10.137848,13.43261,2.572612,2.219203,4.390739,3.173878,4.166665,2.681022,,,,,,,,
"Akron, OH Metro Area",2009,14,10420,1,686152.4,191086.0,281769,23447,97811,108215,36339,15957,10420.0,96807.5,40430.0,3743.0,2.33225,47482,60668,36.8,48.1,15.1,699935,537803,134499,2466,24856,24810,24817,685405,14.7,7.2,78.1,22.4,63.6,14.0,,,,,,,,,2.70805,9.251578,0.693147,13.438857,12.160484,12.548846,10.06254,11.490803,11.591885,10.500674,9.677716,9.251578,11.48049,10.607352,8.22791,1.203648,10.768127,11.013188,3.632309,3.893859,2.778819,13.458744,13.195249,11.809319,7.810758,10.120895,10.119042,10.119324,13.437767,2.753661,2.104134,4.370713,3.152736,4.168214,2.70805,,,,,,,,


In [40]:
# basic log model for bus, all clusters
mod=PanelOLS.from_formula('upt_log \
                    ~ VRM_log \
                    + Tot_Pop_log \
                    + Total_Labor_MSA_log \
                    + gasPrice_log \
                    + EntityEffects \
                    ',data=bus)
res=mod.fit()
print(res)

                          PanelOLS Estimation Summary                           
Dep. Variable:                upt_log   R-squared:                        0.9661
Estimator:                   PanelOLS   R-squared (Between):              0.8583
No. Observations:                1249   R-squared (Within):               0.9661
Date:                Mon, Sep 09 2019   R-squared (Overall):              0.8599
Time:                        13:49:01   Log-likelihood                   -138.53
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      8035.0
Entities:                         116   P-value                           0.0000
Avg Obs:                       10.767   Distribution:                  F(4,1129)
Min Obs:                       6.0000                                           
Max Obs:                       11.000   F-statistic (robust):             8035.0
                            

Inputs contain missing values. Dropping rows with missing observations.


In [41]:
# basic log model for bus, all clusters
# use pop and emp in transit supportive density (TSD)

mod=PanelOLS.from_formula('upt_log \
                    ~ VRM_log \
                    + Pop_TSD_log \
                    + Tot_Emp_TSD_log \
                    + gasPrice_log \
                    + EntityEffects \
                    ',data=bus)
res=mod.fit()
print(res)

                          PanelOLS Estimation Summary                           
Dep. Variable:                upt_log   R-squared:                        0.9660
Estimator:                   PanelOLS   R-squared (Between):              0.9936
No. Observations:                1684   R-squared (Within):               0.9660
Date:                Mon, Sep 09 2019   R-squared (Overall):              0.9934
Time:                        13:53:12   Log-likelihood                    17.426
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                   1.082e+04
Entities:                         156   P-value                           0.0000
Avg Obs:                       10.795   Distribution:                  F(4,1524)
Min Obs:                       6.0000                                           
Max Obs:                       11.000   F-statistic (robust):          1.082e+04
                            

Inputs contain missing values. Dropping rows with missing observations.
