In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel

In [3]:
# Add warehouse data
entry2_data = pd.read_csv("entry_loc2.csv")
entry2_data = entry2_data.dropna()
entry2_data['HD entry'] = 1*(entry2_data['HD'] > 0)
entry2_data['LOW entry'] = 1*(entry2_data['LOW'] > 0)
entry2_data['log income'] = np.log(1 + entry2_data['income_per_capita'])
entry2_data['log population'] = np.log(1 + entry2_data['population'])
entry2_data['log hd warehouse'] = np.log(1 + entry2_data['hd warehouse distance'])
entry2_data['log low warehouse'] = np.log(1 + entry2_data['low warehouse distance'])
entry2_data = entry2_data[(entry2_data['population']>=2000) & (entry2_data['income_per_capita']>=15000)]
entry2_data = entry2_data[(entry2_data['lon'] <= -30) & (entry2_data['lat'] >= 25) ]

In [30]:
entry2_data = entry2_data[entry2_data['population']>10000]
entry2_data['interaction1'] = entry2_data['LOW entry'] * entry2_data['log income']
entry2_data['interaction2'] = entry2_data['HD entry'] * entry2_data['log income']

In [39]:
# Model fit for HD stores
x = entry2_data[['log income','log population','log hd warehouse','LOW entry', 'interaction2']].copy()
y = entry2_data['HD entry']
model1 = sm.OLS(y,sm.add_constant(x)).fit()
print(model1.summary())
#print(model1.summary().as_latex())

                            OLS Regression Results                            
Dep. Variable:               HD entry   R-squared:                       0.999
Model:                            OLS   Adj. R-squared:                  0.999
Method:                 Least Squares   F-statistic:                 6.086e+05
Date:                Thu, 01 Jul 2021   Prob (F-statistic):               0.00
Time:                        14:11:09   Log-Likelihood:                 8256.9
No. Observations:                2952   AIC:                        -1.650e+04
Df Residuals:                    2946   BIC:                        -1.647e+04
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                0.2479      0.009  

In [40]:
model1 = sm.Logit(y,sm.add_constant(x)).fit()
print(model1.summary())
#print(model1.summary().as_latex())

PerfectSeparationError: Perfect separation detected, results not available

In [41]:
# Model fit for HD stores
x = entry2_data[['log income','log population','log hd warehouse','LOW', 'interaction2']].copy()
y = entry2_data['HD']
model1 = sm.OLS(y,sm.add_constant(x)).fit()
model1.summary()

0,1,2,3
Dep. Variable:,HD,R-squared:,0.628
Model:,OLS,Adj. R-squared:,0.628
Method:,Least Squares,F-statistic:,996.0
Date:,"Thu, 01 Jul 2021",Prob (F-statistic):,0.0
Time:,14:11:11,Log-Likelihood:,-2412.3
No. Observations:,2952,AIC:,4837.0
Df Residuals:,2946,BIC:,4873.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.8735,0.328,-2.665,0.008,-1.516,-0.231
log income,0.0027,0.029,0.093,0.926,-0.055,0.060
log population,0.0905,0.013,6.864,0.000,0.065,0.116
log hd warehouse,-0.0341,0.009,-3.770,0.000,-0.052,-0.016
LOW,0.5108,0.017,30.162,0.000,0.478,0.544
interaction2,0.0931,0.002,39.774,0.000,0.088,0.098

0,1,2,3
Omnibus:,4915.56,Durbin-Watson:,1.763
Prob(Omnibus):,0.0,Jarque-Bera (JB):,6175416.079
Skew:,10.812,Prob(JB):,0.0
Kurtosis:,226.023,Cond. No.,516.0


In [42]:
# Model fit for LO stores
x = entry2_data[['log income','log population','log low warehouse','HD entry','interaction1']].copy()
y = entry2_data['LOW entry']
model1 = sm.regression.linear_model.OLS(y,sm.add_constant(x)).fit()
print(model1.summary())
#print(model1.summary().as_latex())

                            OLS Regression Results                            
Dep. Variable:              LOW entry   R-squared:                       0.999
Model:                            OLS   Adj. R-squared:                  0.999
Method:                 Least Squares   F-statistic:                 7.014e+05
Date:                Thu, 01 Jul 2021   Prob (F-statistic):               0.00
Time:                        14:11:12   Log-Likelihood:                 8530.8
No. Observations:                2952   AIC:                        -1.705e+04
Df Residuals:                    2946   BIC:                        -1.701e+04
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                 0.2061      0.00

In [43]:
model1 = sm.Logit(y,sm.add_constant(x)).fit()    # doesn't work when interaction term in model
model1.summary()
#print(model1.summary().as_latex())

PerfectSeparationError: Perfect separation detected, results not available

In [44]:
# Model fit for LOW stores
x = entry2_data[['log income','log population','log low warehouse','HD','interaction1']].copy()
y = entry2_data['LOW']
model1 = sm.regression.linear_model.OLS(y,sm.add_constant(x)).fit()
model1.summary()

0,1,2,3
Dep. Variable:,LOW,R-squared:,0.777
Model:,OLS,Adj. R-squared:,0.776
Method:,Least Squares,F-statistic:,2051.0
Date:,"Thu, 01 Jul 2021",Prob (F-statistic):,0.0
Time:,14:11:17,Log-Likelihood:,-844.63
No. Observations:,2952,AIC:,1701.0
Df Residuals:,2946,BIC:,1737.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0271,0.189,0.143,0.886,-0.344,0.398
log income,-0.0263,0.017,-1.548,0.122,-0.060,0.007
log population,0.0224,0.008,2.907,0.004,0.007,0.038
log low warehouse,-0.0084,0.006,-1.382,0.167,-0.020,0.004
HD,0.2749,0.008,35.768,0.000,0.260,0.290
interaction1,0.0959,0.001,69.984,0.000,0.093,0.099

0,1,2,3
Omnibus:,3744.956,Durbin-Watson:,1.766
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1149416.257
Skew:,6.615,Prob(JB):,0.0
Kurtosis:,98.759,Cond. No.,512.0


In [48]:
# Model fit for LO stores
x = entry2_data[['log income','log population','log low warehouse','HD entry','Northeast', 'Midwest', 'South']].copy()
y = entry2_data['LOW entry']
model1 = sm.Logit(y,sm.add_constant(x)).fit()    # doesn't work when interaction term in model
model1.summary()
#print(model1.summary().as_latex())

Optimization terminated successfully.
         Current function value: 0.495107
         Iterations 6


0,1,2,3
Dep. Variable:,LOW entry,No. Observations:,2952.0
Model:,Logit,Df Residuals:,2944.0
Method:,MLE,Df Model:,7.0
Date:,"Thu, 01 Jul 2021",Pseudo R-squ.:,0.2047
Time:,14:55:53,Log-Likelihood:,-1461.6
converged:,True,LL-Null:,-1837.8
Covariance Type:,nonrobust,LLR p-value:,3.445e-158

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.9909,1.580,0.627,0.530,-2.105,4.087
log income,-0.9902,0.146,-6.791,0.000,-1.276,-0.704
log population,0.6649,0.058,11.468,0.000,0.551,0.778
log low warehouse,0.0750,0.048,1.557,0.119,-0.019,0.169
HD entry,1.3816,0.098,14.114,0.000,1.190,1.573
Northeast,-0.0333,0.181,-0.184,0.854,-0.388,0.321
Midwest,0.2171,0.137,1.588,0.112,-0.051,0.485
South,1.2561,0.127,9.875,0.000,1.007,1.505


In [49]:
# Model fit for HD stores
x = entry2_data[['log income','log population','log low warehouse','LOW entry','Northeast', 'Midwest', 'South']].copy()
y = entry2_data['HD entry']
model1 = sm.Logit(y,sm.add_constant(x)).fit()    # doesn't work when interaction term in model
model1.summary()
#print(model1.summary().as_latex())

Optimization terminated successfully.
         Current function value: 0.514880
         Iterations 6


0,1,2,3
Dep. Variable:,HD entry,No. Observations:,2952.0
Model:,Logit,Df Residuals:,2944.0
Method:,MLE,Df Model:,7.0
Date:,"Thu, 01 Jul 2021",Pseudo R-squ.:,0.1988
Time:,14:56:02,Log-Likelihood:,-1519.9
converged:,True,LL-Null:,-1897.2
Covariance Type:,nonrobust,LLR p-value:,1.246e-158

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-10.3008,1.489,-6.918,0.000,-13.219,-7.382
log income,-0.0317,0.131,-0.242,0.808,-0.288,0.225
log population,0.9161,0.058,15.694,0.000,0.802,1.031
log low warehouse,0.0314,0.047,0.673,0.501,-0.060,0.123
LOW entry,1.3580,0.098,13.845,0.000,1.166,1.550
Northeast,-0.2009,0.162,-1.242,0.214,-0.518,0.116
Midwest,-0.3065,0.128,-2.397,0.017,-0.557,-0.056
South,-0.2451,0.125,-1.961,0.050,-0.490,-0.000
