In [None]:
!pip install -U plotly

Collecting plotly
[?25l  Downloading https://files.pythonhosted.org/packages/c9/09/315462259ab7b60a3d4b7159233ed700733c87d889755bdc00a9fb46d692/plotly-4.14.1-py2.py3-none-any.whl (13.2MB)
[K     |████████████████████████████████| 13.2MB 9.2MB/s 
Installing collected packages: plotly
  Found existing installation: plotly 4.4.1
    Uninstalling plotly-4.4.1:
      Successfully uninstalled plotly-4.4.1
Successfully installed plotly-4.14.1


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
from sklearn.cluster import AgglomerativeClustering
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from scipy.stats import sem
import json

  import pandas.util.testing as tm


In [None]:
with open('05035.geojson') as f:
    geojson_05035 = json.load(f)

with open('10007.geojson') as f:
    geojson_10007 = json.load(f)

with open('10012.geojson') as f:
    geojson_10012 = json.load(f)

In [None]:
geojson_05035['features'].append(geojson_10007['features'][0])
geojson_05035['features'].append(geojson_10012['features'][0])

In [None]:
# Data
df_plot = pd.DataFrame(columns=['NOM_MUN', 'After Pisofirme', 'Before Pisofirme'])
df_plot.loc[0,:] = ['Torreón', 92.97, 33.02]
df_plot.loc[1,:] = ['Gómez Palacio', 72.24, 32.54]
df_plot.loc[2,:] = ['Lerdo', 76.31, 33.64]

df_plot['Before Pisofirme'] = pd.to_numeric(df_plot['Before Pisofirme'], downcast="float")
df_plot['After Pisofirme'] = pd.to_numeric(df_plot['After Pisofirme'], downcast="float")


cols_dd = ["After Pisofirme", "Before Pisofirme"]
# we need to add this to select which trace 
# is going to be visible
visible = np.array(cols_dd)

# define traces and buttons at once
traces = []
buttons = []
for value in cols_dd:
  traces.append(go.Choropleth(geojson=geojson_05035, z=df_plot[value],
                  locations=df_plot.NOM_MUN, 
                  featureidkey='properties.NOM_MUN', 
                  colorscale="Viridis", 
                  zmin=30, zmax=93,
                  colorbar_title="% Share of rooms<br>with cement floors"))

  buttons.append(dict(label=value,
                        method="update",
                        args=[{"visible":list(visible==value)},
                              {"title":f"{value}"}]))

updatemenus = [{"active":1,
                "buttons":buttons,
               }]

# Show figure
fig = go.Figure(data=traces,
                layout=dict(updatemenus=updatemenus))
# This is in order to get the first title displayed correctly
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(title="Percentage share of cemented rooms", 
                  title_x=0.5, autosize=True, margin=dict(r=100, l=100, t=50, b=20))

In [None]:
fig.write_html("map_with_button.html")

In [None]:
# Data path constant
DATA_PATH = "PisoFirme_AEJPol-20070024_household.dta"

# Dependent variables (dataset name)
DEP_VARS = ['S_shcementfloor', 
            'S_cementfloorkit', 
            'S_cementfloordin', 
            'S_cementfloorbat', 
            'S_cementfloorbed']

# Continuous control variables (dataset name)
CONT_CTRL_VARS = ['S_rooms',
                  'S_HHpeople',
                  'S_headeduc',
                  'S_spouseeduc',
                  'S_headage',
                  'S_spouseage',
                  'S_washhands',
                  'S_cashtransfers']

# Dummy NaN for continuous control variables (dataset name)
CONT_CTRL_VARS_NAN = [i + '_nan' for i in CONT_CTRL_VARS]

# Demographic control variables (dataset name)
DEMO_CTRL_VARS = ['S_dem'+str(i+1) for i in range(8)]

# Categorical control variables (dataset name)
DUMMY_CTRL_VARS = ['S_hasanimals',
                   'S_animalsinside',
                   'S_waterland',
                   'S_waterhouse',
                   'S_electricity',
                   'S_garbage',
                   'S_milkprogram',
                   'S_foodprogram',
                   'S_seguropopular']

# Dummy NaN for categorical control variables (dataset name)
DUMMY_CTRL_VARS_NAN = [i + '_nan' for i in DUMMY_CTRL_VARS]

# All control variables to generate NaN related dummies (dataset name) --> demographic variables not included
CTRL_VARS = CONT_CTRL_VARS + DUMMY_CTRL_VARS

# Variables for Model 1 linear regression (statsmodels name) --> single program dummy
MDL1_VARS = ['C(dpisofirme)']

# Variables for Model 2 linear regression (statsmodels name) --> add demographic and health control variables
MDL2_VARS = MDL1_VARS + CONT_CTRL_VARS[:7] + ['C('+i+')' for i in DUMMY_CTRL_VARS[:6]] \
                                           + ['C('+i+')' for i in CONT_CTRL_VARS_NAN[:7]] \
                                           + ['C('+i+')' for i in DUMMY_CTRL_VARS_NAN[:6]] \
                                           + DEMO_CTRL_VARS

# Variables for Model 3 linear regression (statsmodels name) --> add social program control variables
MDL3_VARS = MDL2_VARS + CONT_CTRL_VARS[-1:] + ['C('+i+')' for i in DUMMY_CTRL_VARS[-3:]] \
                                            + ['C('+i+')' for i in CONT_CTRL_VARS_NAN[-1:]] \
                                            + ['C('+i+')' for i in DUMMY_CTRL_VARS_NAN[-3:]] \

# Model variables without S_rooms for discussion part (statsmodels name)
MDL2_VARS_NOROOMS = [x for x in MDL2_VARS if x != 'S_rooms' and x != 'C(S_rooms_nan)']
MDL3_VARS_NOROOMS = [x for x in MDL3_VARS if x != 'S_rooms' and x != 'C(S_rooms_nan)']

# Names for table rows
ROWS = ['Share of rooms with cement floors',
        'Cement floor in kitchen',
        'Cement floor in dining room',
        'Cement floor in bathroom',
        'Cement floor in bedroom']

# Columns for the control group table
CG_COLUMNS = pd.MultiIndex.from_product([['Control Group'], ['Mean','Standard Deviation']])

# Program dummy name in statsmodels coefficients output
PROGRAMM_DUMMY = 'C(dpisofirme)[T.1.0]'

In [None]:
# Load dataset
data = pd.read_stata(DATA_PATH)
# Drop households whose geographical informations is not complete (NaN)
data = data[data['idcluster'].notna()]
# Generate dummies for NaN values for all control variables except S_dem
data = pd.concat([data, pd.get_dummies(data[CTRL_VARS], columns=CTRL_VARS, dummy_na=True)[CONT_CTRL_VARS_NAN + DUMMY_CTRL_VARS_NAN]], axis=1) 
# Impute all NaN values with 0
data = data.fillna(0)
data

Unnamed: 0,dpisofirme,idcluster,coord_x,coord_y,idmun,idmza,C_blocksdirtfloor,C_HHdirtfloor,C_child05,C_households,C_people,C_rooms,C_HHpersons,C_waterland,C_waterhouse,C_waterbath,C_gasheater,C_refrigerator,C_washing,C_telephone,C_vehicle,C_overcrowding,C_poverty,C_illiterate,C_headeduc,C_dropouts515,C_employment,C_earnincome,S_HHpeople,S_headage,S_spouseage,S_headeduc,S_spouseeduc,S_rooms,S_waterland,S_waterhouse,S_electricity,S_cementfloor2000,S_hasanimals,S_animalsinside,...,S_dem5,S_dem6,S_dem7,S_dem8,S_seguropopular,S_shcementfloor,S_cementfloorkit,S_cementfloordin,S_cementfloorbat,S_cementfloorbed,S_satisfloor,S_satishouse,S_satislife,S_cesds,S_pss,S_instcement,S_instsanita,S_restsanita,S_constceili,S_restowalls,S_improveany,S_logrent,S_logsell,S_rooms_nan,S_HHpeople_nan,S_headeduc_nan,S_spouseeduc_nan,S_headage_nan,S_spouseage_nan,S_washhands_nan,S_cashtransfers_nan,S_hasanimals_nan,S_animalsinside_nan,S_waterland_nan,S_waterhouse_nan,S_electricity_nan,S_garbage_nan,S_milkprogram_nan,S_foodprogram_nan,S_seguropopular_nan
0,0.0,70000537.0,-103.503670,25.583067,7.0,40,0.300000,0.036629,0.555554,819.0,3530.0,3.097682,4.310134,0.002443,0.151522,0.272279,0.004885,0.114775,0.247868,0.524304,0.644129,1.731482,0.062267,0.045177,7.578925,0.092800,1.710631,1.610496,3.0,44.0,43.0,6.0,6.0,3,1,1,1,0.40,1.0,0.0,...,0.333333,0.00,0.333333,0.000000,0.0,0.6,1.0,0.0,1.0,0.0,1.0,1.0,1.0,14.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,5.298317,9.903487,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0.0,70000537.0,-103.503670,25.583067,7.0,40,0.300000,0.036629,0.555554,819.0,3530.0,3.097682,4.310134,0.002443,0.151522,0.272279,0.004885,0.114775,0.247868,0.524304,0.644129,1.731482,0.062267,0.045177,7.578925,0.092800,1.710631,1.610496,2.0,37.0,0.0,6.0,0.0,1,1,1,1,0.75,0.0,0.0,...,0.500000,0.00,0.500000,0.000000,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,17.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,5.298317,9.615806,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0.0,70000537.0,-103.503670,25.583067,7.0,40,0.300000,0.036629,0.555554,819.0,3530.0,3.097682,4.310134,0.002443,0.151522,0.272279,0.004885,0.114775,0.247868,0.524304,0.644129,1.731482,0.062267,0.045177,7.578925,0.092800,1.710631,1.610496,2.0,18.0,0.0,12.0,0.0,4,1,1,1,1.00,0.0,0.0,...,0.500000,0.00,0.500000,0.000000,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,16.0,16.0,0.0,0.0,0.0,0.0,0.0,0.0,6.214608,10.819778,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0.0,70000537.0,-103.503670,25.583067,7.0,47,0.300000,0.036629,0.555554,819.0,3530.0,3.097682,4.310134,0.002443,0.151522,0.272279,0.004885,0.114775,0.247868,0.524304,0.644129,1.731482,0.062267,0.045177,7.578925,0.092800,1.710631,1.610496,4.0,43.0,30.0,9.0,9.0,3,1,1,1,1.00,0.0,0.0,...,0.000000,0.00,0.250000,0.000000,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,20.0,19.0,0.0,0.0,0.0,0.0,0.0,0.0,11.385092,11.918390,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0.0,70000537.0,-103.503670,25.583067,7.0,47,0.300000,0.036629,0.555554,819.0,3530.0,3.097682,4.310134,0.002443,0.151522,0.272279,0.004885,0.114775,0.247868,0.524304,0.644129,1.731482,0.062267,0.045177,7.578925,0.092800,1.710631,1.610496,5.0,46.0,45.0,3.0,6.0,3,1,1,1,1.00,1.0,0.0,...,0.200000,0.00,0.400000,0.000000,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,5.703783,10.819778,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2750,1.0,353150000.0,-103.399841,25.501871,35.0,40,0.538462,0.100774,0.759924,454.0,1866.0,3.264429,4.110127,0.011336,0.262172,0.333881,0.022024,0.113730,0.286599,0.660113,0.534601,1.655615,0.070493,0.035692,9.006986,0.116742,1.473560,1.418500,4.0,25.0,24.0,6.0,4.0,1,1,0,1,0.00,0.0,0.0,...,0.000000,0.25,0.250000,0.000000,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,19.0,20.0,1.0,0.0,0.0,1.0,0.0,1.0,5.298317,9.615806,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2751,1.0,353150000.0,-103.399841,25.501871,35.0,40,0.538462,0.100774,0.759924,454.0,1866.0,3.264429,4.110127,0.011336,0.262172,0.333881,0.022024,0.113730,0.286599,0.660113,0.534601,1.655615,0.070493,0.035692,9.006986,0.116742,1.473560,1.418500,6.0,66.0,0.0,2.0,0.0,3,1,1,1,0.50,0.0,0.0,...,0.000000,0.00,0.166667,0.166667,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,9.0,11.0,1.0,0.0,0.0,0.0,0.0,0.0,5.991465,10.819778,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2752,1.0,353150000.0,-103.399841,25.501871,35.0,35,0.538462,0.100774,0.759924,454.0,1866.0,3.264429,4.110127,0.011336,0.262172,0.333881,0.022024,0.113730,0.286599,0.660113,0.534601,1.655615,0.070493,0.035692,9.006986,0.116742,1.473560,1.418500,5.0,35.0,32.0,9.0,9.0,2,1,1,1,0.80,1.0,0.0,...,0.200000,0.20,0.200000,0.000000,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,12.0,19.0,1.0,0.0,0.0,0.0,0.0,0.0,5.991465,9.210340,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2753,1.0,353150000.0,-103.399841,25.501871,35.0,34,0.538462,0.100774,0.759924,454.0,1866.0,3.264429,4.110127,0.011336,0.262172,0.333881,0.022024,0.113730,0.286599,0.660113,0.534601,1.655615,0.070493,0.035692,9.006986,0.116742,1.473560,1.418500,5.0,35.0,33.0,12.0,6.0,2,1,1,1,0.25,1.0,0.0,...,0.000000,0.20,0.200000,0.000000,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,6.0,10.0,1.0,0.0,0.0,0.0,0.0,0.0,6.396930,11.918390,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [None]:
data_treatment = data[data['dpisofirme'] == 1].reset_index(drop=True)
data_control = data[data['dpisofirme'] == 0].reset_index(drop=True)

In [None]:
proportions = ['C_gasheater', \
              'C_waterland', \
              'C_illiterate', \
              'C_HHdirtfloor', \
              'C_poverty', \
              'C_refrigerator', \
              'C_waterhouse', \
              'C_washing', \
              'C_waterbath', \
              'C_vehicle', \
              'C_telephone']

df_boxplots_treatment = pd.DataFrame(columns=['proportion', 'variable', 'Group'])

for i in range(len(proportions)):
  df=pd.DataFrame()
  df['proportion'] = data_treatment.loc[:,proportions[i]]
  df['variable'] = proportions[i]
  df['Group'] = 'Coahuila'
  df_boxplots_treatment = df_boxplots_treatment.append(df, ignore_index=True)

df_boxplots_control = pd.DataFrame(columns=['proportion', 'variable', 'treatment'])

for i in range(len(proportions)):
  df=pd.DataFrame()
  df['proportion'] = data_control.loc[:,proportions[i]]
  df['variable'] = proportions[i]
  df['Group'] = 'Durango'
  df_boxplots_control = df_boxplots_control.append(df, ignore_index=True)

df_boxplots = df_boxplots_control.append(df_boxplots_treatment, ignore_index=True)

In [None]:
xaxis = go.layout.XAxis(tickvals= [0,1,2,3,4,5,6,7,8,9,10,11], ticktext=['No gas heater',
                                  'No water connection outside',
                                  'Illiterate members',
                                  'Dirt floors',
                                  'Below the poverty line',
                                  'No refrigerator',
                                  'No water connection inside',
                                  'No washing machine',
                                  'No water connection in bathroom',
                                  'No vehicle',
                                  'No telephone'])

fig = px.box(df_boxplots, x="variable", y="proportion", color="Group")
fig.update_traces(quartilemethod="exclusive") # or "inclusive", or "linear" by default
fig.update_layout(title='State pre-treatment household proportions distribution', 
                  yaxis_title='Proportion', autosize=False, width=1000, height=600,
                  xaxis = xaxis, legend_title_text='State')
fig.show()

In [None]:
fig.write_html("pre_treatment_proportions.html")

In [None]:
numbers = ['C_rooms', \
           'C_HHpersons', \
           'C_overcrowding', \
           'C_headeduc', \
           'C_employment', \
           'C_earnincome']

df_boxplots_treatment = pd.DataFrame(columns=['number', 'variable', 'Group'])

for i in range(len(numbers)):
  print(numbers[i])
  df=pd.DataFrame()
  df['number'] = data_treatment.loc[:,numbers[i]]
  df['variable'] = numbers[i]
  df['Group'] = 'Coahuila'
  df_boxplots_treatment = df_boxplots_treatment.append(df, ignore_index=True)

df_boxplots_control = pd.DataFrame(columns=['number', 'variable', 'treatment'])

for i in range(len(numbers)):
  df=pd.DataFrame()
  df['number'] = data_control.loc[:,numbers[i]]
  df['variable'] = numbers[i]
  df['Group'] = 'Durango'
  df_boxplots_control = df_boxplots_control.append(df, ignore_index=True)

df_boxplots = df_boxplots_control.append(df_boxplots_treatment, ignore_index=True)

xaxis = go.layout.XAxis(tickvals= [0,1,2,3,4,5], ticktext=['Rooms',
                                  'People',
                                  'Overcrowding',
                                  'Dirt floors',
                                  'Household head schooling years',
                                  'Family members who work',
                                  'Family members with income'])

fig = px.box(df_boxplots, x="variable", y="number", color="Group")
fig.update_traces(quartilemethod="exclusive") # or "inclusive", or "linear" by default
fig.update_layout(title='Pre-treatment average numbers per household', yaxis_title='Average Number per Household', 
                  autosize=False, width=800, height=700, xaxis=xaxis, legend_title_text='State')
fig.show()

C_rooms
C_HHpersons
C_overcrowding
C_headeduc
C_employment
C_earnincome


In [None]:
fig.write_html("pre_treatment_average.html")

In [None]:
cement_regression = pd.read_csv('cement_regression.csv')
cement_regression['dependent_variable'] = ['variable', 
                                           'Share of rooms with cement floors',
                                           'Cement floor in kitchen',
                                           'Cement floor in dining room',
                                           'Cement floor in bathroom',
                                           'Cement floor in bedroom']

cement_regression.round(3)

Unnamed: 0,Control Group,Control Group.1,Model 1,Model 1.1,Model 1.2,Model 2,Model 2.1,Model 2.2,Model 3,Model 3.1,Model 3.2,Model 2 (no S_rooms),Model 2 (no S_rooms).1,Model 2 (no S_rooms).2,Model 3 (no S_rooms),Model 3 (no S_rooms).1,Model 3 (no S_rooms).2,dependent_variable
0,Mean,Standard Deviation,Coef.,St. Err.,Ratio,Coef.,St. Err.,Ratio,Coef.,St. Err.,Ratio,Coef.,St. Err.,Ratio,Coef.,St. Err.,Ratio,variable
1,0.7277989,0.36289524205852725,0.20193507733430965,[0.021]***,27.746000044833913,0.2029972845502722,[0.019]***,27.891947949728745,0.20573052260690652,[0.019]***,28.267496488610718,0.20751334011783876,[0.019]***,28.512456677752915,0.21015805085055658,[0.019]***,28.87584150953763,Share of rooms with cement floors
2,0.6712132,0.4699410345110903,0.2546311376475512,[0.025]***,37.93595447686008,0.25468234553881974,[0.022]***,37.943583631133855,0.25970374611066055,[0.022]***,38.69169175829897,0.2598136123039928,[0.023]***,38.70806005852849,0.26472848744468175,[0.023]***,39.44029760543331,Cement floor in kitchen
3,0.7085427,0.45459684497066866,0.20995948907534356,[0.026]***,29.632580748840567,0.21173213375129676,[0.024]***,29.88276251833581,0.21608583325864508,[0.024]***,30.497220825392322,0.21672909626372264,[0.025]***,30.588007591090072,0.2209884159133728,[0.025]***,31.189145620189056,Cement floor in dining room
4,0.80258435,0.39819158043104835,0.10490463646111074,[0.022]***,13.07085497578546,0.10825236583200253,[0.018]***,13.487973671214384,0.11225828735566802,[0.018]***,13.987101460476776,0.11270636852710583,[0.018]***,14.04293125224594,0.11666187586630747,[0.018]***,14.535777560383389,Cement floor in bathroom
5,0.6676238,0.471234191241231,0.2376625101593653,[0.02]***,35.59826713486129,0.24293343028866562,[0.02]***,36.387771641413586,0.24272374987163545,[0.02]***,36.35636467069118,0.2452496570127732,[0.021]***,36.7347075448272,0.24498156606503443,[0.02]***,36.694551555699405,Cement floor in bedroom


In [None]:
cement_regression['Model 1.1'] = ['St. Err.', 0.021, 0.025, 0.026, 0.022, 0.02]
cement_regression['Model 2 (no S_rooms).1'] = ['St. Err.',0.019, 0.023, 0.025, 0.018, 0.021]
cement_regression['Model 3 (no S_rooms).1'] = ['St. Err.', 0.019, 0.023, 0.025, 0.018, 0.02]
cement_regression

Unnamed: 0,Control Group,Control Group.1,Model 1,Model 1.1,Model 1.2,Model 2,Model 2.1,Model 2.2,Model 3,Model 3.1,Model 3.2,Model 2 (no S_rooms),Model 2 (no S_rooms).1,Model 2 (no S_rooms).2,Model 3 (no S_rooms),Model 3 (no S_rooms).1,Model 3 (no S_rooms).2,dependent_variable
0,Mean,Standard Deviation,Coef.,St. Err.,Ratio,Coef.,St. Err.,Ratio,Coef.,St. Err.,Ratio,Coef.,St. Err.,Ratio,Coef.,St. Err.,Ratio,variable
1,0.7277989,0.36289524205852725,0.20193507733430965,0.021,27.746000044833913,0.2029972845502722,[0.019]***,27.891947949728745,0.20573052260690652,[0.019]***,28.267496488610718,0.20751334011783876,0.019,28.512456677752915,0.21015805085055658,0.019,28.87584150953763,Share of rooms with cement floors
2,0.6712132,0.4699410345110903,0.2546311376475512,0.025,37.93595447686008,0.25468234553881974,[0.022]***,37.943583631133855,0.25970374611066055,[0.022]***,38.69169175829897,0.2598136123039928,0.023,38.70806005852849,0.26472848744468175,0.023,39.44029760543331,Cement floor in kitchen
3,0.7085427,0.45459684497066866,0.20995948907534356,0.026,29.632580748840567,0.21173213375129676,[0.024]***,29.88276251833581,0.21608583325864508,[0.024]***,30.497220825392322,0.21672909626372264,0.025,30.588007591090072,0.2209884159133728,0.025,31.189145620189056,Cement floor in dining room
4,0.80258435,0.39819158043104835,0.10490463646111074,0.022,13.07085497578546,0.10825236583200253,[0.018]***,13.487973671214384,0.11225828735566802,[0.018]***,13.987101460476776,0.11270636852710583,0.018,14.04293125224594,0.11666187586630747,0.018,14.535777560383389,Cement floor in bathroom
5,0.6676238,0.471234191241231,0.2376625101593653,0.02,35.59826713486129,0.24293343028866562,[0.02]***,36.387771641413586,0.24272374987163545,[0.02]***,36.35636467069118,0.2452496570127732,0.021,36.7347075448272,0.24498156606503443,0.02,36.694551555699405,Cement floor in bedroom


In [None]:
cement_regression_bar = pd.DataFrame(columns=['Dependent variable', 'Model', 'Regression coefficient on program dummy', 'Standard Error'])

# Initialize dataframe
for i in range(5):
  df = pd.DataFrame(columns=['Dependent variable', 'Model', 'Regression coefficient on program dummy', 'Standard Error'])
  df.loc[0, :] = [cement_regression.loc[i+1, 'dependent_variable'], 'Model 1', cement_regression.loc[i+1, 'Model 1'], cement_regression.loc[i+1, 'Model 1.1']]
  df.loc[1, :] = [cement_regression.loc[i+1, 'dependent_variable'], 'Model 2', cement_regression.loc[i+1, 'Model 2 (no S_rooms)'], cement_regression.loc[i+1, 'Model 2 (no S_rooms).1']]
  df.loc[2, :] = [cement_regression.loc[i+1, 'dependent_variable'], 'Model 3', cement_regression.loc[i+1, 'Model 3 (no S_rooms)'], cement_regression.loc[i+1, 'Model 3 (no S_rooms).1']]


  cement_regression_bar = cement_regression_bar.append(df, ignore_index=True)

In [None]:
cement_regression_bar['Regression coefficient on program dummy'] = pd.to_numeric(cement_regression_bar['Regression coefficient on program dummy'], downcast="float")
cement_regression_bar['Standard Error'] = pd.to_numeric(cement_regression_bar['Standard Error'], downcast="float")

cement_regression_bar

Unnamed: 0,Dependent variable,Model,Regression coefficient on program dummy,Standard Error
0,Share of rooms with cement floors,Model 1,0.201935,0.021
1,Share of rooms with cement floors,Model 2,0.207513,0.019
2,Share of rooms with cement floors,Model 3,0.210158,0.019
3,Cement floor in kitchen,Model 1,0.254631,0.025
4,Cement floor in kitchen,Model 2,0.259814,0.023
5,Cement floor in kitchen,Model 3,0.264728,0.023
6,Cement floor in dining room,Model 1,0.209959,0.026
7,Cement floor in dining room,Model 2,0.216729,0.025
8,Cement floor in dining room,Model 3,0.220988,0.025
9,Cement floor in bathroom,Model 1,0.104905,0.022


In [None]:
fig = px.bar(cement_regression_bar, x="Dependent variable", y="Regression coefficient on program dummy", color="Model", barmode='group', error_y='Standard Error', title="Regressions of Cement Floor Coverage Measures on Program Dummy")

fig.add_annotation(x=-0.26, y=0.240,
            text="***",
            showarrow=False)

fig.add_annotation(x=0, y=0.240,
            text="***",
            showarrow=False)

fig.add_annotation(x=0.26, y=0.240,
            text="***",
            showarrow=False)

fig.add_annotation(x=0.74, y=0.3,
            text="***",
            showarrow=False)

fig.add_annotation(x=1, y=0.3,
            text="***",
            showarrow=False)

fig.add_annotation(x=1.26, y=0.3,
            text="***",
            showarrow=False)

fig.add_annotation(x=1.74, y=0.255,
            text="***",
            showarrow=False)

fig.add_annotation(x=2, y=0.255,
            text="***",
            showarrow=False)

fig.add_annotation(x=2.26, y=0.255,
            text="***",
            showarrow=False)

fig.add_annotation(x=2.74, y=0.15,
            text="***",
            showarrow=False)

fig.add_annotation(x=3, y=0.15,
            text="***",
            showarrow=False)

fig.add_annotation(x=3.26, y=0.15,
            text="***",
            showarrow=False)

fig.add_annotation(x=3.74, y=0.28,
            text="***",
            showarrow=False)

fig.add_annotation(x=4, y=0.28,
            text="***",
            showarrow=False)

fig.add_annotation(x=4.26, y=0.28,
            text="***",
            showarrow=False)

fig.update_layout(autosize=False, width=800, height=600)
fig.show()

In [None]:
fig.write_html("cement_regression.html")

In [None]:
happiness_regression = pd.read_csv('happiness_regression.csv')
happiness_regression['dependent_variable'] = ['variable', 
                                           'Satisfaction with floor quality',
                                           'Satisfaction with house quality',
                                           'Satisfaction with life quality',
                                           'Depression scale (CES-D scale)',
                                           'Perceived stress scale (PSS)']

happiness_regression

Unnamed: 0,Control Group,Control Group.1,Model 1,Model 1.1,Model 1.2,Model 2 (no S_rooms),Model 2 (no S_rooms).1,Model 2 (no S_rooms).2,Model 3 (no S_rooms),Model 3 (no S_rooms).1,Model 3 (no S_rooms).2,dependent_variable
0,Mean,Standard Deviation,Coef.,St. Err.,Ratio,Coef.,St. Err.,Ratio,Coef.,St. Err.,Ratio,variable
1,0.51112705,0.5000556941034054,0.21868204036756428,[0.023]***,42.78428198241885,0.2230286047500088,[0.024]***,43.634670225919194,0.2219365591265898,[0.026]***,43.42101579041322,Satisfaction with floor quality
2,0.6051687,0.4889899757418024,0.09160075603526961,[0.021]***,15.136400147965663,0.0869538074865456,[0.021]***,14.368523596014366,0.08407234650559844,[0.022]***,13.892381822669476,Satisfaction with house quality
3,0.60086143,0.4898971906130883,0.11206072316692116,[0.022]***,18.650011057551293,0.11150284835022388,[0.021]***,18.557165221774955,0.11236510539273074,[0.022]***,18.700668698485977,Satisfaction with life quality
4,18.532,9.402,-2.315,[0.616]***,-12.493,-2.417,[0.57]***,-13.043,-2.372,[0.562]***,-12.797,Depression scale (CES-D scale)
5,16.514,6.914,-1.751,[0.428]***,-10.603,-1.769,[0.396]***,-10.71,-1.742,[0.396]***,-10.551,Perceived stress scale (PSS)


In [None]:
happiness_regression['Model 1.1'] = ['St. Err.', 0.023, 0.021, 0.022, 0.616, 0.428]
happiness_regression['Model 2 (no S_rooms).1'] = ['St. Err.',0.024, 0.021, 0.021, 0.57, 0.396]
happiness_regression['Model 3 (no S_rooms).1'] = ['St. Err.', 0.026, 0.022, 0.022, 0.562, 0.396]


quality_regression_bar = pd.DataFrame(columns=['Satisfaction', 'Model', 'Regression coefficient on program dummy', 'Standard Error'])

# Initialize dataframe
for i in range(3):
  df = pd.DataFrame(columns=['Satisfaction', 'Model', 'Regression coefficient on program dummy', 'Standard Error'])
  df.loc[0, :] = [happiness_regression.loc[i+1, 'dependent_variable'], 'Model 1', happiness_regression.loc[i+1, 'Model 1'], happiness_regression.loc[i+1, 'Model 1.1']]
  df.loc[1, :] = [happiness_regression.loc[i+1, 'dependent_variable'], 'Model 2', happiness_regression.loc[i+1, 'Model 2 (no S_rooms)'], happiness_regression.loc[i+1, 'Model 2 (no S_rooms).1']]
  df.loc[2, :] = [happiness_regression.loc[i+1, 'dependent_variable'], 'Model 3', happiness_regression.loc[i+1, 'Model 3 (no S_rooms)'], happiness_regression.loc[i+1, 'Model 3 (no S_rooms).1']]


  quality_regression_bar = quality_regression_bar.append(df, ignore_index=True)


scale_regression_bar = pd.DataFrame(columns=['Scale', 'Model', 'Regression coefficient on program dummy', 'Standard Error'])

# Initialize dataframe
for i in range(3, 5):
  df = pd.DataFrame(columns=['Scale', 'Model', 'Regression coefficient on program dummy', 'Standard Error'])
  df.loc[0, :] = [happiness_regression.loc[i+1, 'dependent_variable'], 'Model 1', happiness_regression.loc[i+1, 'Model 1'], happiness_regression.loc[i+1, 'Model 1.1']]
  df.loc[1, :] = [happiness_regression.loc[i+1, 'dependent_variable'], 'Model 2', happiness_regression.loc[i+1, 'Model 2 (no S_rooms)'], happiness_regression.loc[i+1, 'Model 2 (no S_rooms).1']]
  df.loc[2, :] = [happiness_regression.loc[i+1, 'dependent_variable'], 'Model 3', happiness_regression.loc[i+1, 'Model 3 (no S_rooms)'], happiness_regression.loc[i+1, 'Model 3 (no S_rooms).1']]


  scale_regression_bar = scale_regression_bar.append(df, ignore_index=True)

quality_regression_bar['Regression coefficient on program dummy'] = pd.to_numeric(quality_regression_bar['Regression coefficient on program dummy'], downcast="float")
quality_regression_bar['Standard Error'] = pd.to_numeric(quality_regression_bar['Standard Error'], downcast="float")

scale_regression_bar['Regression coefficient on program dummy'] = pd.to_numeric(scale_regression_bar['Regression coefficient on program dummy'], downcast="float")
scale_regression_bar['Standard Error'] = pd.to_numeric(scale_regression_bar['Standard Error'], downcast="float")

In [None]:
fig = px.bar(quality_regression_bar, x="Satisfaction", y="Regression coefficient on program dummy", color="Model", barmode='group', error_y='Standard Error', title="Regressions of Satisfaction Measures on Program Dummy")

fig.add_annotation(x=-0.26, y=0.27,
            text="***",
            showarrow=False)

fig.add_annotation(x=0, y=0.27,
            text="***",
            showarrow=False)

fig.add_annotation(x=0.26, y=0.27,
            text="***",
            showarrow=False)

fig.add_annotation(x=0.74, y=0.125,
            text="***",
            showarrow=False)

fig.add_annotation(x=1, y=0.125,
            text="***",
            showarrow=False)

fig.add_annotation(x=1.26, y=0.125,
            text="***",
            showarrow=False)

fig.add_annotation(x=1.74, y=0.15,
            text="***",
            showarrow=False)

fig.add_annotation(x=2, y=0.15,
            text="***",
            showarrow=False)

fig.add_annotation(x=2.26, y=0.15,
            text="***",
            showarrow=False)

fig.update_layout(autosize=False, width=800, height=600)
fig.show()

In [None]:
fig.write_html("satisfaction_regression.html")

In [None]:
fig = px.bar(scale_regression_bar, x="Scale", y="Regression coefficient on program dummy", color="Model", barmode='group', error_y='Standard Error', title="Regressions of Stress and Depression Measures on Program Dummy")

fig.add_annotation(x=-0.265, y=-3.1,
            text="***",
            showarrow=False)

fig.add_annotation(x=0, y=-3.1,
            text="***",
            showarrow=False)

fig.add_annotation(x=0.265, y=-3.1,
            text="***",
            showarrow=False)

fig.add_annotation(x=0.735, y=-2.5,
            text="***",
            showarrow=False)

fig.add_annotation(x=1, y=-2.5,
            text="***",
            showarrow=False)

fig.add_annotation(x=1.265, y=-2.5,
            text="***",
            showarrow=False)

fig.update_layout(autosize=False, width=800, height=600)
fig.show()