In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel

In [2]:
# Add warehouse data
entry3_data = pd.read_csv("entry_loc3_w_filter.csv")
entry3_data = entry3_data.dropna()
entry3_data['HD entry'] = 1*(entry3_data['HD'] > 0)
entry3_data['LOW entry'] = 1*(entry3_data['LOW'] > 0)
entry3_data['log income'] = np.log(1 + entry3_data['income_per_capita'])
entry3_data['log population'] = np.log(1 + entry3_data['population'])
entry3_data['log hd warehouse'] = np.log(1 + entry3_data['hd warehouse distance'])
entry3_data['log low warehouse'] = np.log(1 + entry3_data['low warehouse distance'])
# entry3_data = entry3_data[(entry3_data['lon'] <= -30) & (entry3_data['lat'] >= 25) ]

In [3]:
entry3_data['interaction1'] = entry3_data['LOW entry'] * entry3_data['log income']
entry3_data['interaction2'] = entry3_data['HD entry'] * entry3_data['log income']

In [4]:
# Model fit for HD stores
x = entry3_data[['log income','log population','log hd warehouse','LOW entry', 'interaction2']].copy()
y = entry3_data['HD entry']
model1 = sm.OLS(y,sm.add_constant(x)).fit()
print(model1.summary())
#print(model1.summary().as_latex())

                            OLS Regression Results                            
Dep. Variable:               HD entry   R-squared:                       0.999
Model:                            OLS   Adj. R-squared:                  0.999
Method:                 Least Squares   F-statistic:                 2.941e+05
Date:                Thu, 12 Aug 2021   Prob (F-statistic):               0.00
Time:                        14:10:32   Log-Likelihood:                 2680.3
No. Observations:                 896   AIC:                            -5349.
Df Residuals:                     890   BIC:                            -5320.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                0.5027      0.017  

In [6]:
x = entry3_data[['log income','log population','log hd warehouse','LOW entry']].copy()
y = entry3_data['HD entry']
model1 = sm.Logit(y,sm.add_constant(x)).fit()
print(model1.summary())
#print(model1.summary().as_latex())

Optimization terminated successfully.
         Current function value: 0.510998
         Iterations 6
                           Logit Regression Results                           
Dep. Variable:               HD entry   No. Observations:                  896
Model:                          Logit   Df Residuals:                      891
Method:                           MLE   Df Model:                            4
Date:                Thu, 12 Aug 2021   Pseudo R-squ.:                  0.2499
Time:                        14:10:55   Log-Likelihood:                -457.85
converged:                       True   LL-Null:                       -610.39
Covariance Type:            nonrobust   LLR p-value:                 8.722e-65
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
const              -21.8678      3.524     -6.206      0.000     -28.775     -14.961
log income 

In [8]:
# Model fit for HD stores
x = entry3_data[['log income','log population','log hd warehouse','LOW','interaction2']].copy()
y = entry3_data['HD']
model1 = sm.OLS(y,sm.add_constant(x)).fit()
model1.summary()

0,1,2,3
Dep. Variable:,HD,R-squared:,0.704
Model:,OLS,Adj. R-squared:,0.703
Method:,Least Squares,F-statistic:,424.0
Date:,"Thu, 12 Aug 2021",Prob (F-statistic):,1.38e-232
Time:,14:11:17,Log-Likelihood:,-802.89
No. Observations:,896,AIC:,1618.0
Df Residuals:,890,BIC:,1647.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-2.1266,0.820,-2.595,0.010,-3.735,-0.518
log income,0.0361,0.086,0.417,0.676,-0.134,0.206
log population,0.1586,0.024,6.696,0.000,0.112,0.205
log hd warehouse,0.0023,0.018,0.128,0.898,-0.033,0.037
LOW,0.5290,0.027,19.733,0.000,0.476,0.582
interaction2,0.0897,0.005,19.018,0.000,0.080,0.099

0,1,2,3
Omnibus:,673.454,Durbin-Watson:,1.973
Prob(Omnibus):,0.0,Jarque-Bera (JB):,19732.539
Skew:,3.097,Prob(JB):,0.0
Kurtosis:,25.14,Cond. No.,672.0


In [9]:
# Model fit for LO stores
x = entry3_data[['log income','log population','log low warehouse','HD entry','interaction1']].copy()
y = entry3_data['LOW entry']
model1 = sm.regression.linear_model.OLS(y,sm.add_constant(x)).fit()
print(model1.summary())
#print(model1.summary().as_latex())

                            OLS Regression Results                            
Dep. Variable:              LOW entry   R-squared:                       0.999
Model:                            OLS   Adj. R-squared:                  0.999
Method:                 Least Squares   F-statistic:                 2.837e+05
Date:                Thu, 12 Aug 2021   Prob (F-statistic):               0.00
Time:                        14:11:21   Log-Likelihood:                 2653.5
No. Observations:                 896   AIC:                            -5295.
Df Residuals:                     890   BIC:                            -5266.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                 0.5095      0.01

In [10]:
x = entry3_data[['log income','log population','log low warehouse','HD entry']].copy()
y = entry3_data['LOW entry']
model1 = sm.Logit(y,sm.add_constant(x)).fit()    # doesn't work when interaction term in model
model1.summary()
#print(model1.summary().as_latex())

Optimization terminated successfully.
         Current function value: 0.568007
         Iterations 6


0,1,2,3
Dep. Variable:,LOW entry,No. Observations:,896.0
Model:,Logit,Df Residuals:,891.0
Method:,MLE,Df Model:,4.0
Date:,"Thu, 12 Aug 2021",Pseudo R-squ.:,0.1805
Time:,14:11:36,Log-Likelihood:,-508.93
converged:,True,LL-Null:,-621.0
Covariance Type:,nonrobust,LLR p-value:,2.41e-47

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-5.2183,3.229,-1.616,0.106,-11.548,1.111
log income,-0.2910,0.340,-0.856,0.392,-0.957,0.375
log population,0.8737,0.097,9.007,0.000,0.684,1.064
log low warehouse,-0.1828,0.073,-2.500,0.012,-0.326,-0.039
HD entry,0.7005,0.172,4.076,0.000,0.364,1.037


In [11]:
# Model fit for LOW stores
x = entry3_data[['log income','log population','log low warehouse','HD','interaction1']].copy()
y = entry3_data['LOW']
model1 = sm.regression.linear_model.OLS(y,sm.add_constant(x)).fit()
model1.summary()

0,1,2,3
Dep. Variable:,LOW,R-squared:,0.755
Model:,OLS,Adj. R-squared:,0.753
Method:,Least Squares,F-statistic:,548.0
Date:,"Thu, 12 Aug 2021",Prob (F-statistic):,9.49e-269
Time:,14:11:42,Log-Likelihood:,-552.48
No. Observations:,896,AIC:,1117.0
Df Residuals:,890,BIC:,1146.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,1.8366,0.616,2.980,0.003,0.627,3.046
log income,-0.1653,0.065,-2.534,0.011,-0.293,-0.037
log population,-0.0016,0.019,-0.086,0.932,-0.038,0.035
log low warehouse,-0.0450,0.014,-3.225,0.001,-0.072,-0.018
HD,0.4395,0.018,24.261,0.000,0.404,0.475
interaction1,0.0969,0.003,29.083,0.000,0.090,0.103

0,1,2,3
Omnibus:,655.415,Durbin-Watson:,1.945
Prob(Omnibus):,0.0,Jarque-Bera (JB):,30205.792
Skew:,2.798,Prob(JB):,0.0
Kurtosis:,30.889,Cond. No.,687.0


In [13]:
# Model fit for LO stores
x = entry3_data[['log income','log population','log low warehouse','HD entry','Northeast_x', 'Midwest_x', 'South_x']].copy()
y = entry3_data['LOW entry']
model1 = sm.Logit(y,sm.add_constant(x)).fit()    # doesn't work when interaction term in model
model1.summary()
#print(model1.summary().as_latex())

Optimization terminated successfully.
         Current function value: 0.533086
         Iterations 6


0,1,2,3
Dep. Variable:,LOW entry,No. Observations:,896.0
Model:,Logit,Df Residuals:,888.0
Method:,MLE,Df Model:,7.0
Date:,"Thu, 12 Aug 2021",Pseudo R-squ.:,0.2309
Time:,14:12:00,Log-Likelihood:,-477.64
converged:,True,LL-Null:,-621.0
Covariance Type:,nonrobust,LLR p-value:,4.14e-58

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-11.7237,3.526,-3.325,0.001,-18.635,-4.812
log income,0.1669,0.362,0.461,0.645,-0.542,0.876
log population,0.9422,0.103,9.170,0.000,0.741,1.144
log low warehouse,-0.0947,0.077,-1.226,0.220,-0.246,0.057
HD entry,0.7015,0.182,3.850,0.000,0.344,1.059
Northeast_x,0.2848,0.317,0.900,0.368,-0.336,0.905
Midwest_x,0.1544,0.244,0.634,0.526,-0.323,0.632
South_x,1.4330,0.246,5.831,0.000,0.951,1.915


In [14]:
# Model fit for HD stores
x = entry3_data[['log income','log population','log low warehouse','LOW entry','Northeast_x', 'Midwest_x', 'South_x']].copy()
y = entry3_data['HD entry']
model1 = sm.Logit(y,sm.add_constant(x)).fit()    # doesn't work when interaction term in model
model1.summary()
#print(model1.summary().as_latex())

Optimization terminated successfully.
         Current function value: 0.499085
         Iterations 6


0,1,2,3
Dep. Variable:,HD entry,No. Observations:,896.0
Model:,Logit,Df Residuals:,888.0
Method:,MLE,Df Model:,7.0
Date:,"Thu, 12 Aug 2021",Pseudo R-squ.:,0.2674
Time:,14:12:11,Log-Likelihood:,-447.18
converged:,True,LL-Null:,-610.39
Covariance Type:,nonrobust,LLR p-value:,1.367e-66

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-20.3862,3.607,-5.651,0.000,-27.456,-13.316
log income,0.9496,0.366,2.595,0.009,0.233,1.667
log population,1.0116,0.101,9.977,0.000,0.813,1.210
log low warehouse,0.0442,0.076,0.577,0.564,-0.106,0.194
LOW entry,0.6562,0.184,3.570,0.000,0.296,1.016
Northeast_x,-0.5572,0.322,-1.730,0.084,-1.189,0.074
Midwest_x,-1.0919,0.247,-4.429,0.000,-1.575,-0.609
South_x,-0.5648,0.247,-2.285,0.022,-1.049,-0.080
