<a href="https://colab.research.google.com/github/hatchdavid1/tensorflow_notes_tests/blob/main/LatentVars_w_DL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install econml[all]
from google.colab import files 
files.upload()

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import econml
from econml.iv.nnet import DeepIV
import keras

Using TensorFlow backend.


In [10]:
df = pd.read_stata('hard_traveling_dataset.dta')
df.head()

Unnamed: 0,loc,pcbs_pov_cluster,checkpoint_1,checkpoint_2,checkpoint_3,checkpoint_4,checkpoint_5,checkpoint_6,checkpoint_7,checkpoint_8,...,settle_in_1000km,settle_in_2000km,settle_in_3000km,settle_in_4000km,settle_in_5000km,settle_in_6000km,settle_in_7000km,settle_in_8000km,settle_in_9000km,settle_in_10000km
0,10005,10005.0,9,0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,10010,10010.0,0,4,5,0,0,0,0,5,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,10015,10010.0,0,0,9,0,0,0,0,6,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,10020,10010.0,0,0,0,9,0,5,0,0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,10025,10055.0,0,0,0,0,4,8,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [12]:
df.rename(columns={'oe_bright_30': 'obstruction',
                   'oe_lf_1_bright30': 'protection', 
                   'iv_bright_30': 'iv_obstruction',
                   'iv_lf_1_bright30': 'iv_protection'}, inplace=True)
# Normalize variables and change some nan to 0
for var in ['obstruction', 'protection', 'iv_obstruction', 'iv_protection']:
  df[var] = df[var]/df[var].mean()

In [13]:
df.replace(np.inf, 0, inplace = True)
df.replace(np.nan, 0, inplace = True)

In [16]:
# Loading 71 controls as dummy variables 
def loading_vars(variable):  
  return [f'{variable}_{i}' for i in range(0,11)]

In [23]:
governorate_dummies = loading_vars('g')
checkpoint_dummies = loading_vars('checkpoint')
partial_checkpoint_dummies = loading_vars('partialcheckpoint')
roadgate_dummies = loading_vars('roadgate')
greenlinecheckpoint_dummies = loading_vars('greenlinecheckpoint')
earthmound_dummies = loading_vars('earthmound')
settle_dummies = [f"settle_in_{i}km" for i in range(1000, 11000, 1000)]
all_dummies = governorate_dummies + checkpoint_dummies + partial_checkpoint_dummies + roadgate_dummies + greenlinecheckpoint_dummies + earthmound_dummies + settle_dummies

In [24]:
# Divide ds in half using the median value of population totals = 1885
# Separate varaibable (outcome, treatment, covariates, instruments) are defined by two subgroups and converted into np arrays to fit the NN
df_per = df[df['population_total'] <= 1884]
df_not_per = df[df['population_total']  >= 1885]

In [36]:
# Set variables for peripheral neighbourhoods outcome: y, treatment: t, covariates: x, instruments: z and convert to arrays 
not_in = {'checkpoint_0', 'partialcheckpoint_0', 'roadgate_0', 'greenlinecheckpoint_0', 'earthmound_0'}
all_dummies_1 = [ele for ele in all_dummies if ele not in not_in]
y = (df_per['chng_employment']).to_numpy()
t = (df_per[['obstruction', 'protection']]).to_numpy()
x = (df_per[all_dummies_1]).to_numpy()
z = (df_per[['iv_obstruction', 'iv_protection']]).to_numpy()

In [32]:
all_dummies

['g_0',
 'g_1',
 'g_2',
 'g_3',
 'g_4',
 'g_5',
 'g_6',
 'g_7',
 'g_8',
 'g_9',
 'g_10',
 'checkpoint_0',
 'checkpoint_1',
 'checkpoint_2',
 'checkpoint_3',
 'checkpoint_4',
 'checkpoint_5',
 'checkpoint_6',
 'checkpoint_7',
 'checkpoint_8',
 'checkpoint_9',
 'checkpoint_10',
 'partialcheckpoint_0',
 'partialcheckpoint_1',
 'partialcheckpoint_2',
 'partialcheckpoint_3',
 'partialcheckpoint_4',
 'partialcheckpoint_5',
 'partialcheckpoint_6',
 'partialcheckpoint_7',
 'partialcheckpoint_8',
 'partialcheckpoint_9',
 'partialcheckpoint_10',
 'roadgate_0',
 'roadgate_1',
 'roadgate_2',
 'roadgate_3',
 'roadgate_4',
 'roadgate_5',
 'roadgate_6',
 'roadgate_7',
 'roadgate_8',
 'roadgate_9',
 'roadgate_10',
 'greenlinecheckpoint_0',
 'greenlinecheckpoint_1',
 'greenlinecheckpoint_2',
 'greenlinecheckpoint_3',
 'greenlinecheckpoint_4',
 'greenlinecheckpoint_5',
 'greenlinecheckpoint_6',
 'greenlinecheckpoint_7',
 'greenlinecheckpoint_8',
 'greenlinecheckpoint_9',
 'greenlinecheckpoint_10',
 'ear