# Replication notebook for "Hard traveling: unemployment and road infrastructure in the shadow of political conflict" (Abrahams, 2021)

https://www.cambridge.org/core/journals/political-science-research-and-methods/article/abs/hard-traveling-unemployment-and-road-infrastructure-in-the-shadow-of-political-conflict/135F8A50F613DA3C9C4CB9335F0BFCF7#article

In [3]:
import os
import pandas as pd
from linearmodels import IV2SLS
from collections import OrderedDict
from linearmodels.iv.results import compare

In [4]:
my_data = f"hard_traveling_dataset.dta"

In [5]:
df = pd.read_stata(my_data)

## Rename columns to match variable names 

In [6]:
df.rename(columns={'oe_bright_30': 'obstruction',
'oe_lf_1_bright30': 'protection', 
'iv_bright_30': 'iv_obstruction',
'iv_lf_1_bright30': 'iv_protection'}, inplace=True)

## Normalize treatment and instrumental variables

In [7]:
for var in ['obstruction', 'protection', 'iv_obstruction', 'iv_protection']:
    df[var] = df[var]/df[var].mean()

## Add dummy variables

In [8]:
governorate_dummies = [f"g_{i}" for i in range(0, 11)]
checkpoint_dummies = [f"checkpoint_{i}" for i in range(1, 11)]
partial_checkpoint_dummies = [f"partialcheckpoint_{i}" for i in range(1, 11)]
roadgate_dummies = [f"roadgate_{i}" for i in range(1, 11)]
greenlinecheckpoint_dummies = [f"greenlinecheckpoint_{i}" for i in range(1, 11)] 
earthmound_dummies = [f"earthmound_{i}" for i in range(1, 11)]
settle_dummies = [f"settle_in_{i}km" for i in range(1000, 11000, 1000)]
all_dummies = governorate_dummies + checkpoint_dummies + partial_checkpoint_dummies + roadgate_dummies + greenlinecheckpoint_dummies + earthmound_dummies + settle_dummies

## OLS (endogenous regression)

In [9]:
def run_ols(dependent, exog, weight):
    X = df[exog] # define independent variables
    y = df[dependent] # define dependent variable
    weight = df[weight]
    model = IV2SLS(y, X, endog=None, instruments=None, weights=weight)
    results = model.fit()
    # print(results.summary)
    return results

result_ols = run_ols('chng_employment', 
                     ['obstruction', 'protection']+ all_dummies, 
                     'lf_1_2007'
                    )

## 2SLS regression 

In [10]:
def run_twosls(dependent, exog, endog, instrument, weight):
    X = df[exog] # define controls
    y = df[dependent] # define dependent variable
    endog = df[endog] # define endogenous regressors/independent variables
    instrument = df[instrument] # define instrumental variables 
    weight = df[weight] # set variable to weight by 
    model = IV2SLS(y, X, endog, instruments=instrument, weights=weight)
    results = model.fit()
    # print(results.summary)
    return results

result_twosls = run_twosls('chng_employment', 
                           all_dummies, 
                           ['obstruction', 'protection'], 
                           ['iv_obstruction', 'iv_protection'], 
                           'lf_1_2007'
                          )

## Clustered 2SLS regression

In [14]:
def run_twosls_cluster(dependent, exog, endog, instrument, weight, cluster):
    X = df[exog] # define controls
    y = df[dependent] # define dependent variable
    endog = df[endog] # define endogenous regressors/independent variables
    instrument = df[instrument] # define instrumental variables 
    weight = df[weight] # weight factor
    cluster = df[cluster]
    model = IV2SLS(y, X, endog, instruments=instrument, weights=weight)
    results = model.fit(cov_type="clustered", clusters=cluster)
    # print(results.summary)
    return results

result_twosls_cluster = run_twosls_cluster('chng_employment', 
                                            all_dummies, 
                                            ['obstruction', 'protection'], 
                                            ['iv_obstruction', 'iv_protection'], 
                                            'lf_1_2007',
                                            'pcbs_pov_cluster'
                                           )
        

## Results table comparing OLS to 2SLS to clustered 2SLS

In [16]:
res = OrderedDict()
res['OLS'] = result_ols
res['2SLS'] = result_twosls
res['clustered_2SLS'] = result_twosls_cluster
print(compare(res))

                                   Model Comparison                                   
                                           OLS                2SLS      clustered_2SLS
--------------------------------------------------------------------------------------
Dep. Variable                  chng_employment     chng_employment     chng_employment
Estimator                                  OLS             IV-2SLS             IV-2SLS
No. Observations                           480                 480                 480
Cov. Est.                               robust              robust           clustered
R-squared                               0.3474              0.3355              0.3355
Adj. R-squared                          0.2319              0.2180              0.2180
F-statistic                             4548.7              6810.3              6840.4
P-value (F-stat)                        0.0000              0.0000              0.0000
obstruction                            -2.5