In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel

In [2]:
# Add warehouse data
entry2_data = pd.read_csv("entry_loc2.csv")
entry2_data = entry2_data.dropna()
entry2_data['HD entry'] = 1*(entry2_data['HD'] > 0)
entry2_data['LOW entry'] = 1*(entry2_data['LOW'] > 0)
entry2_data['log income'] = np.log(1 + entry2_data['income_per_capita'])
entry2_data['log population'] = np.log(1 + entry2_data['population'])
entry2_data['log hd warehouse'] = np.log(1 + entry2_data['hd warehouse distance'])
entry2_data['log low warehouse'] = np.log(1 + entry2_data['low warehouse distance'])
entry2_data = entry2_data[(entry2_data['population']>=2000) & (entry2_data['income_per_capita']>=15000)]
entry2_data = entry2_data[(entry2_data['lon'] <= -30) & (entry2_data['lat'] >= 25) ]

In [3]:
# Model fit for HD stores
x = entry2_data[['log income','log population','log hd warehouse','LOW entry']].copy()
y = entry2_data['HD entry']
model1 = sm.OLS(y,sm.add_constant(x)).fit()
print(model1.summary().as_latex())

\begin{center}
\begin{tabular}{lclc}
\toprule
\textbf{Dep. Variable:}    &     HD entry     & \textbf{  R-squared:         } &     0.322   \\
\textbf{Model:}            &       OLS        & \textbf{  Adj. R-squared:    } &     0.322   \\
\textbf{Method:}           &  Least Squares   & \textbf{  F-statistic:       } &     829.8   \\
\textbf{Date:}             & Wed, 16 Jun 2021 & \textbf{  Prob (F-statistic):} &     0.00    \\
\textbf{Time:}             &     12:36:51     & \textbf{  Log-Likelihood:    } &   -1674.5   \\
\textbf{No. Observations:} &        6987      & \textbf{  AIC:               } &     3359.   \\
\textbf{Df Residuals:}     &        6982      & \textbf{  BIC:               } &     3393.   \\
\textbf{Df Model:}         &           4      & \textbf{                     } &             \\
\bottomrule
\end{tabular}
\begin{tabular}{lcccccc}
                          & \textbf{coef} & \textbf{std err} & \textbf{t} & \textbf{P$> |$t$|$} & \textbf{[0.025} & \textbf{0.975]}  \\

  return ptp(axis=axis, out=out, **kwargs)


In [4]:
model1 = sm.Logit(y,sm.add_constant(x)).fit()
print(model1.summary().as_latex())

Optimization terminated successfully.
         Current function value: 0.303376
         Iterations 7
\begin{center}
\begin{tabular}{lclc}
\toprule
\textbf{Dep. Variable:}   &     HD entry     & \textbf{  No. Observations:  } &     6987    \\
\textbf{Model:}           &      Logit       & \textbf{  Df Residuals:      } &     6982    \\
\textbf{Method:}          &       MLE        & \textbf{  Df Model:          } &        4    \\
\textbf{Date:}            & Wed, 16 Jun 2021 & \textbf{  Pseudo R-squ.:     } &   0.3289    \\
\textbf{Time:}            &     12:36:51     & \textbf{  Log-Likelihood:    } &   -2119.7   \\
\textbf{converged:}       &       True       & \textbf{  LL-Null:           } &   -3158.5   \\
\bottomrule
\end{tabular}
\begin{tabular}{lcccccc}
                          & \textbf{coef} & \textbf{std err} & \textbf{z} & \textbf{P$> |$z$|$} & \textbf{[0.025} & \textbf{0.975]}  \\
\midrule
\textbf{const}            &     -14.2117  &        1.232     &   -11.537  &         0.

In [5]:
# Model fit for HD stores
x = entry2_data[['log income','log population','log hd warehouse','LOW']].copy()
y = entry2_data['HD']
model1 = sm.OLS(y,sm.add_constant(x)).fit()
model1.summary()

0,1,2,3
Dep. Variable:,HD,R-squared:,0.427
Model:,OLS,Adj. R-squared:,0.427
Method:,Least Squares,F-statistic:,1301.0
Date:,"Wed, 16 Jun 2021",Prob (F-statistic):,0.0
Time:,12:36:51,Log-Likelihood:,-5016.2
No. Observations:,6987,AIC:,10040.0
Df Residuals:,6982,BIC:,10080.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-1.0561,0.180,-5.876,0.000,-1.408,-0.704
log income,0.0104,0.017,0.621,0.534,-0.022,0.043
log population,0.1229,0.006,21.481,0.000,0.112,0.134
log hd warehouse,-0.0184,0.006,-3.119,0.002,-0.030,-0.007
LOW,0.6740,0.013,50.712,0.000,0.648,0.700

0,1,2,3
Omnibus:,9368.873,Durbin-Watson:,1.854
Prob(Omnibus):,0.0,Jarque-Bera (JB):,6369933.157
Skew:,7.163,Prob(JB):,0.0
Kurtosis:,150.225,Cond. No.,445.0


In [6]:
# Model fit for LO stores
x = entry2_data[['log income','log population','log low warehouse','HD entry']].copy()
y = entry2_data['LOW entry']
model1 = sm.regression.linear_model.OLS(y,sm.add_constant(x)).fit()
print(model1.summary().as_latex())

\begin{center}
\begin{tabular}{lclc}
\toprule
\textbf{Dep. Variable:}    &    LOW entry     & \textbf{  R-squared:         } &     0.271   \\
\textbf{Model:}            &       OLS        & \textbf{  Adj. R-squared:    } &     0.271   \\
\textbf{Method:}           &  Least Squares   & \textbf{  F-statistic:       } &     649.9   \\
\textbf{Date:}             & Wed, 16 Jun 2021 & \textbf{  Prob (F-statistic):} &     0.00    \\
\textbf{Time:}             &     12:36:51     & \textbf{  Log-Likelihood:    } &   -1789.7   \\
\textbf{No. Observations:} &        6987      & \textbf{  AIC:               } &     3589.   \\
\textbf{Df Residuals:}     &        6982      & \textbf{  BIC:               } &     3624.   \\
\textbf{Df Model:}         &           4      & \textbf{                     } &             \\
\bottomrule
\end{tabular}
\begin{tabular}{lcccccc}
                           & \textbf{coef} & \textbf{std err} & \textbf{t} & \textbf{P$> |$t$|$} & \textbf{[0.025} & \textbf{0.975]}  \

In [7]:
model1 = sm.Logit(y,sm.add_constant(x)).fit()
print(model1.summary().as_latex())

Optimization terminated successfully.
         Current function value: 0.319914
         Iterations 7
\begin{center}
\begin{tabular}{lclc}
\toprule
\textbf{Dep. Variable:}    &    LOW entry     & \textbf{  No. Observations:  } &     6987    \\
\textbf{Model:}            &      Logit       & \textbf{  Df Residuals:      } &     6982    \\
\textbf{Method:}           &       MLE        & \textbf{  Df Model:          } &        4    \\
\textbf{Date:}             & Wed, 16 Jun 2021 & \textbf{  Pseudo R-squ.:     } &   0.2712    \\
\textbf{Time:}             &     12:38:30     & \textbf{  Log-Likelihood:    } &   -2235.2   \\
\textbf{converged:}        &       True       & \textbf{  LL-Null:           } &   -3067.1   \\
\bottomrule
\end{tabular}
\begin{tabular}{lcccccc}
                           & \textbf{coef} & \textbf{std err} & \textbf{z} & \textbf{P$> |$z$|$} & \textbf{[0.025} & \textbf{0.975]}  \\
\midrule
\textbf{const}             &       0.2564  &        1.267     &     0.202  &   

In [8]:
# Model fit for LOW stores
x = entry2_data[['log income','log population','log low warehouse','HD']].copy()
y = entry2_data['LOW']
model1 = sm.regression.linear_model.OLS(y,sm.add_constant(x)).fit()
model1.summary()

0,1,2,3
Dep. Variable:,LOW,R-squared:,0.427
Model:,OLS,Adj. R-squared:,0.426
Method:,Least Squares,F-statistic:,1300.0
Date:,"Wed, 16 Jun 2021",Prob (F-statistic):,0.0
Time:,12:38:30,Log-Likelihood:,-3193.4
No. Observations:,6987,AIC:,6397.0
Df Residuals:,6982,BIC:,6431.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.2562,0.135,1.894,0.058,-0.009,0.521
log income,-0.0961,0.013,-7.623,0.000,-0.121,-0.071
log population,0.0962,0.004,21.835,0.000,0.088,0.105
log low warehouse,-0.0096,0.005,-1.929,0.054,-0.019,0.000
HD,0.3992,0.008,50.644,0.000,0.384,0.415

0,1,2,3
Omnibus:,3902.346,Durbin-Watson:,1.809
Prob(Omnibus):,0.0,Jarque-Bera (JB):,115003.322
Skew:,2.124,Prob(JB):,0.0
Kurtosis:,22.416,Cond. No.,443.0
