In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import linearmodels as lm
import matplotlib
import math
import statsmodels.formula.api as smf

from linearmodels.panel import PanelOLS
from linearmodels.panel import RandomEffects
from linearmodels.panel import FirstDifferenceOLS
from linearmodels.panel import compare
from matplotlib import pyplot as plt

from pandas.api.types import is_numeric_dtype

pd.set_option('display.max_columns', 500)

%matplotlib inline

In [2]:
# read the main data set
df=pd.read_csv('final_dataset_oct_24.csv',
                    encoding='utf-8')


In [5]:
# exclude if ridership is zero--missing data
df = df[df['UPT']>0]

In [6]:
# separate Bus, Rail, DemandResponsive and Other
bus = df[df['Mode']=='Bus']
rail = df[df['Mode']=='Rail']


In [7]:
# set the indices
bus=bus.set_index(['MNAME','Year'])
rail=rail.set_index(['MNAME','Year'])

In [8]:
# keep only the numeric columns -- the estimation will give an error otherwise
bus = bus.select_dtypes(include=[np.number])
rail = rail.select_dtypes(include=[np.number])

In [9]:
# create a log of all fields
for col in bus.columns:
    bus[col+'_log'] = np.log(bus[col]+1)
    
for col in rail.columns:
    rail[col+'_log'] = np.log(rail[col]+1)    

  app.launch_new_instance()


In [10]:
# check the data
bus.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,CBSA,NEW_CBSA,UPT,VRM,UPT_ADJ,VRM_ADJ,VRH_ADJ,FARE_TOTAL,FARE_per_UPT,AREALANDPT,Tot_Pop,Tot_Instate_Pop,Tot_Outstate_Pop,Tot_NonUSA_POP,Total_Median_Income_Individual,Native_Instate_Med_Inc_Indiv,Native_Outstate_Med_Inc_Indiv,Total_Pop_Poverty,Pop_Below100_Poverty,Pop_Below150_Poverty,Pop_Above150_Poverty,Age_under18,Age_18to64,Age_over64,Total_HH,HH_0Veh,HH_1Veh,HH_2Veh,HH_3Veh,HH_4+Veh,PCT_HH_NO_VEH,HH_MED_INC,HH_MEAN_INC,INC_U35,INC_35_100,INC_100P,TOT_UNEMP_MSA,TOT_EMP_MSA,TOT_LABOR_MSA,UNEMP_RATE_PCT,EMP_RATE_PCT,GasPrice,Area_acre,AVG_SPEED,2018_Dollar_Multiplier,TNC_ARRIVAL,TNC_FLAG,YEARS_SINCE_TNC,PROBLEM_FLAG,CBSA_log,NEW_CBSA_log,UPT_log,VRM_log,UPT_ADJ_log,VRM_ADJ_log,VRH_ADJ_log,FARE_TOTAL_log,FARE_per_UPT_log,AREALANDPT_log,Tot_Pop_log,Tot_Instate_Pop_log,Tot_Outstate_Pop_log,Tot_NonUSA_POP_log,Total_Median_Income_Individual_log,Native_Instate_Med_Inc_Indiv_log,Native_Outstate_Med_Inc_Indiv_log,Total_Pop_Poverty_log,Pop_Below100_Poverty_log,Pop_Below150_Poverty_log,Pop_Above150_Poverty_log,Age_under18_log,Age_18to64_log,Age_over64_log,Total_HH_log,HH_0Veh_log,HH_1Veh_log,HH_2Veh_log,HH_3Veh_log,HH_4+Veh_log,PCT_HH_NO_VEH_log,HH_MED_INC_log,HH_MEAN_INC_log,INC_U35_log,INC_35_100_log,INC_100P_log,TOT_UNEMP_MSA_log,TOT_EMP_MSA_log,TOT_LABOR_MSA_log,UNEMP_RATE_PCT_log,EMP_RATE_PCT_log,GasPrice_log,Area_acre_log,AVG_SPEED_log,2018_Dollar_Multiplier_log,TNC_ARRIVAL_log,TNC_FLAG_log,YEARS_SINCE_TNC_log,PROBLEM_FLAG_log
MNAME,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1
"Abilene, TX Metro Area",2002,10180,,493073,389678,493073.0,389678.0,29016.0,162609.0,0.329787,141756054,117258.2188,87723.96875,23752.17188,2987.484375,19756.67188,22848.10938,17091.51563,147631.94,22.3,12.89,64.82,31.23,54.9,13.87,61601.69,1195.73,27303.56,21926.16,8121.36,3054.88,1.94,25775.875,30134.875,61.74,34.07,4.19,3945.33,73760.33,77705.67,5.08,94.92,1.32,,13.43,1.4,2016,0,0,,9.228279,,13.108415,12.873079,13.108415,12.873079,10.275637,11.99911,0.285019,18.769618,11.672142,11.381962,10.075471,8.002522,9.891297,10.036667,9.746396,11.902484,3.148453,2.631169,4.186924,3.472898,4.023564,2.699346,11.028461,7.087348,10.214809,9.995481,9.002376,8.024823,1.07841,10.157233,10.313472,4.138999,3.557346,1.646734,8.280541,11.20859,11.260696,1.805005,4.563515,0.841567,,2.669309,0.875469,7.609367,0.0,0.0,
"Abilene, TX Metro Area",2003,10180,,475747,383239,475747.0,383239.0,28597.0,173563.0,0.364822,141756054,127098.125,93677.125,26964.3125,2834.0625,19941.3125,21972.5625,18472.9375,147198.25,20.72,12.86,66.43,29.78,56.41,13.81,61229.25,1515.06,25910.75,22846.38,8038.56,2918.5,2.47,29077.5,35373.5,57.54,36.92,5.54,4222.75,75377.08,79599.83,5.3,94.7,1.5,,13.4,1.37,2016,0,0,,9.228279,,13.072644,12.856417,13.072644,12.856417,10.261092,12.064302,0.311024,18.769618,11.752723,11.44762,10.202307,7.949819,9.900599,9.997595,9.824116,11.899542,3.078233,2.629007,4.21109,3.426865,4.050219,2.695303,11.022397,7.32387,10.162452,10.036592,8.99213,7.979168,1.244155,10.277754,10.473747,4.06971,3.635479,1.877937,8.348479,11.230272,11.28478,1.84055,4.561218,0.916291,,2.667228,0.86289,7.609367,0.0,0.0,
"Abilene, TX Metro Area",2004,10180,,469981,388907,469981.0,388907.0,29126.0,185046.0,0.393731,141756054,137113.5,99787.5,30227.75,2648.75,20132.75,21085.75,19808.25,146801.0,19.13,12.88,68.0,28.3,57.95,13.75,60833.0,1823.75,24514.0,23787.5,7942.75,2765.0,3.0,32452.0,40728.0,53.25,39.83,6.93,3712.17,75862.08,79574.25,4.67,95.33,1.79,,13.35,1.33,2016,0,0,,9.228279,,13.06045,12.871098,13.06045,12.871098,10.279421,12.128365,0.331984,18.769618,11.828572,11.510808,10.316549,7.882221,9.910153,9.9564,9.893904,11.89684,3.002211,2.630449,4.234107,3.377588,4.07669,2.691243,11.015904,7.509198,10.10704,10.076958,8.980141,7.925158,1.386294,10.387548,10.614696,3.993603,3.709417,2.070653,8.219641,11.236685,11.284458,1.735189,4.56778,1.026042,,2.66375,0.845868,7.609367,0.0,0.0,
"Abilene, TX Metro Area",2005,10180,,507165,444176,507165.0,444176.0,31147.0,213965.0,0.421884,141756054,146427.0,105269.0,33286.0,2591.0,20297.0,20244.0,21328.0,146258.0,17.6,12.7,69.7,26.9,59.4,13.7,60532.0,2175.0,23133.0,24645.0,7899.0,2680.0,3.59,35535.0,45619.0,49.3,42.5,8.2,3375.75,76704.5,80080.25,4.22,95.78,2.23,1216.0,14.26,1.29,2016,0,0,,9.228279,,13.136594,13.003978,13.136594,13.003978,10.346505,12.273572,0.351983,18.769618,11.894289,11.564284,10.412922,7.860185,9.918278,9.915663,9.967823,11.893134,2.923162,2.617396,4.258446,3.328627,4.100989,2.687847,11.010944,7.685244,10.049059,10.11237,8.974618,7.893945,1.52388,10.478302,10.728101,3.918005,3.772761,2.219203,8.124669,11.247729,11.290797,1.652497,4.57244,1.172482,7.104144,2.725235,0.828552,7.609367,0.0,0.0,
"Abilene, TX Metro Area",2006,10180,,493638,441300,493638.0,441300.0,30750.0,201795.0,0.408791,141756054,158548.0,113266.0,37165.0,2023.0,20570.0,19222.0,22110.0,146298.0,15.8,13.3,70.9,25.2,61.2,13.6,59850.0,2356.0,21689.0,25837.0,7647.0,2321.0,3.94,39784.0,52364.0,44.0,46.1,9.9,3242.08,78203.25,81445.33,3.98,96.02,2.52,1216.0,14.35,1.25,2016,0,0,,9.228279,,13.10956,12.997482,13.10956,12.997482,10.333678,12.215013,0.342732,18.769618,11.973819,11.637503,10.52315,7.612831,9.931638,9.863863,10.003831,11.893408,2.821379,2.66026,4.275276,3.265759,4.130355,2.681022,10.999613,7.765145,9.984607,10.159602,8.942199,7.750184,1.597365,10.591245,10.865994,3.806662,3.852273,2.388763,8.084279,11.267079,11.3077,1.60543,4.574917,1.258461,7.104144,2.731115,0.81093,7.609367,0.0,0.0,


In [11]:
rail.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,CBSA,NEW_CBSA,UPT,VRM,UPT_ADJ,VRM_ADJ,VRH_ADJ,FARE_TOTAL,FARE_per_UPT,AREALANDPT,Tot_Pop,Tot_Instate_Pop,Tot_Outstate_Pop,Tot_NonUSA_POP,Total_Median_Income_Individual,Native_Instate_Med_Inc_Indiv,Native_Outstate_Med_Inc_Indiv,Total_Pop_Poverty,Pop_Below100_Poverty,Pop_Below150_Poverty,Pop_Above150_Poverty,Age_under18,Age_18to64,Age_over64,Total_HH,HH_0Veh,HH_1Veh,HH_2Veh,HH_3Veh,HH_4+Veh,PCT_HH_NO_VEH,HH_MED_INC,HH_MEAN_INC,INC_U35,INC_35_100,INC_100P,TOT_UNEMP_MSA,TOT_EMP_MSA,TOT_LABOR_MSA,UNEMP_RATE_PCT,EMP_RATE_PCT,GasPrice,Area_acre,AVG_SPEED,2018_Dollar_Multiplier,TNC_ARRIVAL,TNC_FLAG,YEARS_SINCE_TNC,PROBLEM_FLAG,CBSA_log,NEW_CBSA_log,UPT_log,VRM_log,UPT_ADJ_log,VRM_ADJ_log,VRH_ADJ_log,FARE_TOTAL_log,FARE_per_UPT_log,AREALANDPT_log,Tot_Pop_log,Tot_Instate_Pop_log,Tot_Outstate_Pop_log,Tot_NonUSA_POP_log,Total_Median_Income_Individual_log,Native_Instate_Med_Inc_Indiv_log,Native_Outstate_Med_Inc_Indiv_log,Total_Pop_Poverty_log,Pop_Below100_Poverty_log,Pop_Below150_Poverty_log,Pop_Above150_Poverty_log,Age_under18_log,Age_18to64_log,Age_over64_log,Total_HH_log,HH_0Veh_log,HH_1Veh_log,HH_2Veh_log,HH_3Veh_log,HH_4+Veh_log,PCT_HH_NO_VEH_log,HH_MED_INC_log,HH_MEAN_INC_log,INC_U35_log,INC_35_100_log,INC_100P_log,TOT_UNEMP_MSA_log,TOT_EMP_MSA_log,TOT_LABOR_MSA_log,UNEMP_RATE_PCT_log,EMP_RATE_PCT_log,GasPrice_log,Area_acre_log,AVG_SPEED_log,2018_Dollar_Multiplier_log,TNC_ARRIVAL_log,TNC_FLAG_log,YEARS_SINCE_TNC_log,PROBLEM_FLAG_log
MNAME,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1
"Albuquerque, NM Metro Area",2009,10740,,1351087,813885,1351087.0,813885.0,20107.0,2669729.0,1.975986,648969769,856216.0,432274.0,329891.0,11065.0,25253.0,23216.0,30017.0,846650.0,15.9,10.4,73.7,24.8,63.1,12.1,334647.0,17776.0,120045.0,127430.0,47757.0,21639.0,5.31,46824.0,61059.0,36.7,46.6,16.7,31447.33,372776.25,404223.58,7.78,92.22,2.27,28089.6,40.48,1.17,2014,0,0,,9.281823,,14.116421,13.609576,14.116421,13.609576,9.908873,14.797488,1.090575,20.290897,13.660279,12.976817,12.706521,9.311633,10.13674,10.05264,10.309552,13.649044,2.827314,2.433613,4.31348,3.250374,4.160444,2.572612,12.720835,9.785661,11.69563,11.75533,10.773902,9.982299,1.842136,10.754173,11.019612,3.62966,3.862833,2.873565,10.356101,12.828736,12.909726,2.172476,4.534962,1.18479,10.24319,3.725211,0.774727,7.608374,0.0,0.0,
"Albuquerque, NM Metro Area",2010,10740,,1165687,1415143,1165687.0,1415143.0,37249.0,2846221.0,2.441668,648969769,892014.0,459673.0,332367.0,9139.0,25891.0,23628.0,31267.0,881703.0,,,,24.4,63.2,12.4,344800.0,16484.0,120481.0,136269.0,48104.0,23462.0,4.78,47383.0,63619.0,36.8,45.3,17.9,34020.5,390669.08,424689.58,8.01,91.99,2.7,28089.6,37.99,1.15,2014,0,0,,9.281823,,13.968822,14.162742,13.968822,14.162742,10.525407,14.861503,1.235956,20.290897,13.701238,13.038273,12.713998,9.120416,10.161689,10.07023,10.35035,13.689612,,,,3.234749,4.162003,2.595255,12.750723,9.710206,11.699256,11.822393,10.781141,10.06318,1.754404,10.76604,11.060683,3.632309,3.835142,2.939162,10.434748,12.875619,12.959116,2.198335,4.532492,1.308333,10.24319,3.663305,0.765468,7.608374,0.0,0.0,
"Albuquerque, NM Metro Area",2011,10740,,1242174,1370680,1242174.0,1370680.0,36676.0,2856263.0,2.299407,648969769,899149.0,466738.0,336918.0,9294.0,24924.0,21970.0,30564.0,887261.0,,,,24.4,63.0,12.6,345157.0,23644.0,121901.0,126939.0,52444.0,20229.0,6.85,45027.0,61912.0,40.2,41.9,17.9,31629.17,389437.25,421066.42,7.51,92.49,3.42,28089.6,37.37,1.12,2014,0,0,,9.281823,,14.032374,14.130818,14.032374,14.130818,10.509905,14.865025,1.193743,20.290897,13.709205,13.053525,12.727598,9.137232,10.123627,9.997479,10.327611,13.695896,,,,3.234749,4.158883,2.61007,12.751758,10.070907,11.710973,11.75147,10.86752,9.914922,2.060514,10.71504,11.033485,3.718438,3.758872,2.939162,10.361867,12.872461,12.950548,2.141242,4.537854,1.48614,10.24319,3.647276,0.751416,7.608374,0.0,0.0,
"Albuquerque, NM Metro Area",2012,10740,,1129303,1424748,1129303.0,1424748.0,36870.0,2616485.0,2.316903,648969769,902794.0,484743.0,320910.0,12335.0,25616.0,22332.0,31553.0,887938.0,,,,24.1,62.7,13.2,344869.0,21014.0,120959.0,130200.0,48806.0,23890.0,6.09,46725.0,62672.0,38.8,43.1,18.1,29752.5,387763.75,417516.25,7.13,92.87,3.49,28089.6,38.64,1.09,2014,0,0,,9.281823,,13.937112,14.169506,13.937112,14.169506,10.515181,14.777343,1.199031,20.290897,13.713251,13.091376,12.678919,9.420277,10.151011,10.013821,10.359456,13.696658,,,,3.222868,4.154185,2.653242,12.750923,9.952992,11.703215,11.776835,10.795629,10.081257,1.958685,10.752056,11.045686,3.683867,3.78646,2.949688,10.300702,12.868154,12.942081,2.095561,4.541911,1.501853,10.24319,3.679839,0.737164,7.608374,0.0,0.0,
"Albuquerque, NM Metro Area",2013,10740,,1082588,1388562,1082588.0,1388562.0,36503.0,3002928.0,2.773842,648969769,901932.0,472342.0,328386.0,11036.0,25717.0,22709.0,31723.0,890054.0,19.4,10.7,69.8,23.7,62.5,13.8,336614.0,18513.0,118331.0,125926.0,51657.0,22187.0,5.5,48355.0,64625.0,37.7,43.9,18.4,28250.83,387442.33,415693.17,6.8,93.2,3.37,28089.6,38.04,1.08,2014,0,0,,9.281823,,13.894866,14.14378,13.894866,14.14378,10.505177,14.915099,1.328094,20.290897,13.712296,13.065461,12.701948,9.309009,10.154946,10.030561,10.364829,13.699039,3.015535,2.459589,4.259859,3.206803,4.15104,2.694627,12.726695,9.826282,11.68125,11.743458,10.8524,10.007307,1.871802,10.786346,11.076372,3.65584,3.804438,2.965273,10.248914,12.867325,12.937705,2.054124,4.54542,1.474763,10.24319,3.664587,0.732368,7.608374,0.0,0.0,


In [16]:
# basic log model for bus, all clusters
mod=PanelOLS.from_formula('UPT_ADJ_log \
                    ~ VRM_ADJ_log \
                    + Tot_Pop_log \
                    + TOT_EMP_MSA_log \
                    + GasPrice_log \
                    + EntityEffects \
                    ',data=bus)
res=mod.fit()
print(res)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  new_df = df.copy().loc[self._full_index]
Inputs contain missing values. Dropping rows with missing observations.


                          PanelOLS Estimation Summary                           
Dep. Variable:            UPT_ADJ_log   R-squared:                        0.1866
Estimator:                   PanelOLS   R-squared (Between):              0.7625
No. Observations:                4504   R-squared (Within):               0.1866
Date:                Fri, Oct 25 2019   R-squared (Overall):              0.7587
Time:                        11:38:33   Log-likelihood                   -2187.9
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      240.21
Entities:                         313   P-value                           0.0000
Avg Obs:                       14.390   Distribution:                  F(4,4187)
Min Obs:                       1.0000                                           
Max Obs:                       17.000   F-statistic (robust):             240.21
                            

In [17]:
# basic log model for bus, all clusters
mod=PanelOLS.from_formula('UPT_ADJ_log \
                    ~ VRM_ADJ_log \
                    + Tot_Pop_log \
                    + UNEMP_RATE_PCT_log \
                    + GasPrice_log \
                    + EntityEffects \
                    ',data=bus)
res=mod.fit()
print(res)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  new_df = df.copy().loc[self._full_index]
Inputs contain missing values. Dropping rows with missing observations.


                          PanelOLS Estimation Summary                           
Dep. Variable:            UPT_ADJ_log   R-squared:                        0.1885
Estimator:                   PanelOLS   R-squared (Between):              0.5304
No. Observations:                4504   R-squared (Within):               0.1885
Date:                Fri, Oct 25 2019   R-squared (Overall):              0.5272
Time:                        11:40:00   Log-likelihood                   -2182.7
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      243.18
Entities:                         313   P-value                           0.0000
Avg Obs:                       14.390   Distribution:                  F(4,4187)
Min Obs:                       1.0000                                           
Max Obs:                       17.000   F-statistic (robust):             243.18
                            

In [18]:
# basic log model for bus, all clusters
mod=PanelOLS.from_formula('UPT_ADJ_log \
                    ~ VRM_ADJ_log \
                    + Total_HH_log \
                    + UNEMP_RATE_PCT_log \
                    + GasPrice_log \
                    + EntityEffects \
                    ',data=bus)
res=mod.fit()
print(res)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  new_df = df.copy().loc[self._full_index]
Inputs contain missing values. Dropping rows with missing observations.


                          PanelOLS Estimation Summary                           
Dep. Variable:            UPT_ADJ_log   R-squared:                        0.1885
Estimator:                   PanelOLS   R-squared (Between):              0.5272
No. Observations:                4504   R-squared (Within):               0.1885
Date:                Fri, Oct 25 2019   R-squared (Overall):              0.5240
Time:                        11:40:49   Log-likelihood                   -2182.8
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      243.10
Entities:                         313   P-value                           0.0000
Avg Obs:                       14.390   Distribution:                  F(4,4187)
Min Obs:                       1.0000                                           
Max Obs:                       17.000   F-statistic (robust):             243.10
                            

In [19]:
# basic log model for bus, all clusters
mod=PanelOLS.from_formula('UPT_ADJ_log \
                    ~ VRM_ADJ_log \
                    + Tot_Pop_log \
                    + UNEMP_RATE_PCT_log \
                    + PCT_HH_NO_VEH_log \
                    + GasPrice_log \
                    + EntityEffects \
                    ',data=bus)
res=mod.fit()
print(res)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  new_df = df.copy().loc[self._full_index]
Inputs contain missing values. Dropping rows with missing observations.


                          PanelOLS Estimation Summary                           
Dep. Variable:            UPT_ADJ_log   R-squared:                        0.1886
Estimator:                   PanelOLS   R-squared (Between):              0.5301
No. Observations:                4499   R-squared (Within):               0.1886
Date:                Fri, Oct 25 2019   R-squared (Overall):              0.5269
Time:                        11:41:43   Log-likelihood                   -2179.2
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      194.39
Entities:                         313   P-value                           0.0000
Avg Obs:                       14.374   Distribution:                  F(5,4181)
Min Obs:                       1.0000                                           
Max Obs:                       17.000   F-statistic (robust):             194.39
                            

In [22]:
# basic log model for bus, all clusters
mod=PanelOLS.from_formula('UPT_ADJ_log \
                    ~ VRM_ADJ_log \
                    + FARE_per_UPT_log \
                    + Tot_Pop_log \
                    + UNEMP_RATE_PCT_log \
                    + GasPrice_log \
                    + EntityEffects \
                    ',data=bus)
res=mod.fit()
print(res)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  new_df = df.copy().loc[self._full_index]
Inputs contain missing values. Dropping rows with missing observations.


                          PanelOLS Estimation Summary                           
Dep. Variable:            UPT_ADJ_log   R-squared:                        0.4857
Estimator:                   PanelOLS   R-squared (Between):              0.4763
No. Observations:                4504   R-squared (Within):               0.4857
Date:                Fri, Oct 25 2019   R-squared (Overall):              0.4731
Time:                        11:43:38   Log-likelihood                   -1155.8
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      790.58
Entities:                         313   P-value                           0.0000
Avg Obs:                       14.390   Distribution:                  F(5,4186)
Min Obs:                       1.0000                                           
Max Obs:                       17.000   F-statistic (robust):             790.58
                            

In [23]:
# basic log model for bus, all clusters
mod=PanelOLS.from_formula('UPT_ADJ_log \
                    ~ VRM_ADJ_log \
                    + FARE_per_UPT_log \
                    + Tot_Pop_log \
                    + TOT_EMP_MSA_log \
                    + GasPrice_log \
                    + EntityEffects \
                    ',data=bus)
res=mod.fit()
print(res)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  new_df = df.copy().loc[self._full_index]
Inputs contain missing values. Dropping rows with missing observations.


                          PanelOLS Estimation Summary                           
Dep. Variable:            UPT_ADJ_log   R-squared:                        0.4858
Estimator:                   PanelOLS   R-squared (Between):              0.8556
No. Observations:                4504   R-squared (Within):               0.4858
Date:                Fri, Oct 25 2019   R-squared (Overall):              0.8521
Time:                        11:44:22   Log-likelihood                   -1155.2
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      791.01
Entities:                         313   P-value                           0.0000
Avg Obs:                       14.390   Distribution:                  F(5,4186)
Min Obs:                       1.0000                                           
Max Obs:                       17.000   F-statistic (robust):             791.01
                            

In [27]:
# basic log model for bus, all clusters
mod=PanelOLS.from_formula('UPT_ADJ_log \
                    ~ VRM_ADJ_log \
                    + FARE_per_UPT_log \
                    + Tot_Pop_log \
                    + TOT_EMP_MSA_log \
                    + GasPrice_log \
                    + TNC_FLAG \
                    + EntityEffects \
                    ',data=bus)
res=mod.fit()
print(res)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  new_df = df.copy().loc[self._full_index]
Inputs contain missing values. Dropping rows with missing observations.


                          PanelOLS Estimation Summary                           
Dep. Variable:            UPT_ADJ_log   R-squared:                        0.4865
Estimator:                   PanelOLS   R-squared (Between):              0.7661
No. Observations:                4504   R-squared (Within):               0.4865
Date:                Fri, Oct 25 2019   R-squared (Overall):              0.7623
Time:                        13:40:48   Log-likelihood                   -1151.9
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      660.95
Entities:                         313   P-value                           0.0000
Avg Obs:                       14.390   Distribution:                  F(6,4185)
Min Obs:                       1.0000                                           
Max Obs:                       17.000   F-statistic (robust):             660.95
                            

In [26]:
# basic log model for bus, all clusters
mod=PanelOLS.from_formula('UPT_ADJ_log \
                    ~ VRM_ADJ_log \
                    + FARE_per_UPT_log \
                    + Tot_Pop_log \
                    + TOT_EMP_MSA_log \
                    + GasPrice_log \
                    + YEARS_SINCE_TNC \
                    + EntityEffects \
                    ',data=bus)
res=mod.fit()
print(res)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  new_df = df.copy().loc[self._full_index]
Inputs contain missing values. Dropping rows with missing observations.


                          PanelOLS Estimation Summary                           
Dep. Variable:            UPT_ADJ_log   R-squared:                        0.4873
Estimator:                   PanelOLS   R-squared (Between):              0.9507
No. Observations:                4504   R-squared (Within):               0.4873
Date:                Fri, Oct 25 2019   R-squared (Overall):              0.9483
Time:                        13:40:21   Log-likelihood                   -1148.6
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      662.98
Entities:                         313   P-value                           0.0000
Avg Obs:                       14.390   Distribution:                  F(6,4185)
Min Obs:                       1.0000                                           
Max Obs:                       17.000   F-statistic (robust):             662.98
                            

In [28]:
# basic log model for bus, all clusters
mod=PanelOLS.from_formula('UPT_ADJ_log \
                    ~ VRM_ADJ_log \
                    + AVG_SPEED_log \
                    + FARE_per_UPT_log \
                    + Tot_Pop_log \
                    + TOT_EMP_MSA_log \
                    + GasPrice_log \
                    + YEARS_SINCE_TNC \
                    + EntityEffects \
                    ',data=bus)
res=mod.fit()
print(res)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  new_df = df.copy().loc[self._full_index]
Inputs contain missing values. Dropping rows with missing observations.


                          PanelOLS Estimation Summary                           
Dep. Variable:            UPT_ADJ_log   R-squared:                        0.5042
Estimator:                   PanelOLS   R-squared (Between):              0.9259
No. Observations:                4504   R-squared (Within):               0.5042
Date:                Fri, Oct 25 2019   R-squared (Overall):              0.9235
Time:                        13:41:30   Log-likelihood                   -1072.9
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      607.96
Entities:                         313   P-value                           0.0000
Avg Obs:                       14.390   Distribution:                  F(7,4184)
Min Obs:                       1.0000                                           
Max Obs:                       17.000   F-statistic (robust):             607.96
                            

In [29]:
# basic log model for bus, all clusters
mod=PanelOLS.from_formula('UPT_ADJ_log \
                    ~ VRM_ADJ_log \
                    + FARE_per_UPT_log \
                    + Tot_Pop_log \
                    + TOT_EMP_MSA_log \
                    + GasPrice_log \
                    + YEARS_SINCE_TNC \
                    + EntityEffects \
                    ',data=bus)
res=mod.fit()
print(res)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  new_df = df.copy().loc[self._full_index]
Inputs contain missing values. Dropping rows with missing observations.


                          PanelOLS Estimation Summary                           
Dep. Variable:            UPT_ADJ_log   R-squared:                        0.4873
Estimator:                   PanelOLS   R-squared (Between):              0.9507
No. Observations:                4504   R-squared (Within):               0.4873
Date:                Fri, Oct 25 2019   R-squared (Overall):              0.9483
Time:                        13:42:46   Log-likelihood                   -1148.6
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      662.98
Entities:                         313   P-value                           0.0000
Avg Obs:                       14.390   Distribution:                  F(6,4185)
Min Obs:                       1.0000                                           
Max Obs:                       17.000   F-statistic (robust):             662.98
                            

In [31]:
# basic log model for bus, all clusters
mod=PanelOLS.from_formula('UPT_ADJ_log \
                    ~ VRM_ADJ_log \
                    + FARE_per_UPT_log \
                    + Tot_Pop_log \
                    + TOT_EMP_MSA_log \
                    + GasPrice_log \
                    + PCT_HH_NO_VEH \
                    + YEARS_SINCE_TNC \
                    + EntityEffects \
                    ',data=bus)
res=mod.fit()
print(res)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  new_df = df.copy().loc[self._full_index]
Inputs contain missing values. Dropping rows with missing observations.


                          PanelOLS Estimation Summary                           
Dep. Variable:            UPT_ADJ_log   R-squared:                        0.4879
Estimator:                   PanelOLS   R-squared (Between):              0.9593
No. Observations:                4504   R-squared (Within):               0.4879
Date:                Fri, Oct 25 2019   R-squared (Overall):              0.9572
Time:                        13:44:46   Log-likelihood                   -1146.0
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      569.46
Entities:                         313   P-value                           0.0000
Avg Obs:                       14.390   Distribution:                  F(7,4184)
Min Obs:                       1.0000                                           
Max Obs:                       17.000   F-statistic (robust):             569.46
                            