# <span style="color:darkblue"> Lecture 12a: Analyzing Experiments </span>

<font size = "5">



# <span style="color:darkblue"> I. Import Libraries </span>


In [1]:
# The "pandas" library is used for processing datasets
# The "numpy" is for numeric observations and random numbers
# The "matplotlib.pyplot" library is for creating graphs

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# We will "alias" two sublibraries in "statsmodels"
# "statsmodels.formula.api" contains functions to estimate models
# "statsmodels.api" contains general-use statistical options

import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col

<font size = "5">

Import data

In [2]:
dataset = pd.read_stata("data_raw/malawiexperiment.dta")

In [19]:
display(dataset)
print(dataset.describe())

Unnamed: 0,hhid,refchild,envelope_number,de_date,de_start,de_end,survey_version,q5c_school_code,q6_ra_id,q12_day,...,ln50p_invest_std_reg,ln50p_invest_std_miss,ln1_invest_std_reg,ln1_invest_std_miss,ln_invest_std_reg,ln_invest_std_miss,wtp_engl_math,ln10p_wtp_engl_math,ln50p_wtp_engl_math,ln_wtp_engl_math
0,4169,1,2,06/08/2012,16:05:01,16:20:21,74.0,4169,19.0,30,...,-100.000000,1.0,-0.500948,0.0,-100.000000,1.0,-200.0,-0.509496,-0.504247,-0.510826
1,4275,1,10,08/08/2012,10:54:24,11:07:18,74.0,B001,8.0,18,...,-0.070700,0.0,0.658406,0.0,-0.070733,0.0,175.0,2.045208,1.921813,2.079442
2,4086,1,10,08/08/2012,08:56:05,09:09:55,74.0,B001,11.0,18,...,-0.829332,0.0,0.362077,0.0,-0.829403,0.0,200.0,0.250998,0.249737,0.251315
3,4294,2,8,10/08/2012,13:27:16,13:45:03,74.0,B001,36.0,15,...,-100.000000,1.0,-0.012934,0.0,-100.000000,1.0,0.0,0.000000,0.000000,0.000000
4,14112,1,11,01/08/2012,09:36:12,09:51:37,74.0,B001,9.0,14,...,-100.000000,1.0,-100.000000,1.0,-100.000000,1.0,200.0,0.509496,0.504247,0.510826
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5263,9620,2,1,10/08/2012,08:32:23,08:47:04,74.0,P002,10.0,12,...,-0.059132,0.0,0.664002,0.0,-0.059165,0.0,400.0,0.845397,0.837886,0.847298
5264,9628,1,2,07/08/2012,14:53:44,15:10:06,74.0,P002,60.0,14,...,-0.571806,0.0,0.447550,0.0,-0.571861,0.0,0.0,0.000000,0.000000,0.000000
5265,9630,1,2,07/08/2012,15:10:12,15:28:26,74.0,P002,20.0,16,...,-0.817149,0.0,0.365794,0.0,-0.817218,0.0,0.0,0.000000,0.000000,0.000000
5266,9616,1,1,10/08/2012,08:14:29,08:31:51,74.0,P002,63.0,12,...,0.741472,0.0,1.131077,0.0,0.741457,0.0,0.0,0.000000,0.000000,0.000000


               hhid     refchild  envelope_number  part3_33_c  part5_5_13_a  \
count   5268.000000  5268.000000      5268.000000     6.00000    436.000000   
mean    7673.931283     1.500000         4.020881    -6.00000      2.651376   
min        1.000000     1.000000         1.000000    -9.00000     -9.000000   
25%     4723.000000     1.000000         2.000000    -9.00000      2.000000   
50%     6667.500000     1.500000         3.000000    -9.00000      3.000000   
75%     9165.000000     2.000000         5.000000    -2.25000      3.000000   
max    18917.000000     2.000000        15.000000     0.00000     60.000000   
std     5097.328038     0.500047         2.755794     4.64758      4.301031   

       part5_5_15_a  part5_5_17_a  part5_5_13_b  part5_5_15_b  part5_5_17_b  \
count    450.000000    426.000000    434.000000    432.000000    422.000000   
mean       2.453333      2.713615      2.764977      2.407407      2.810427   
min       -9.000000     -9.000000     -9.000000    

# <span style="color:darkblue"> II. Context </span>


<font size = "5">

Today we will review a paper by Rebecca Dizon-Ross published <br>
in the American Economic Review (2019).

- In this study, researchers partnered with local schools in Malawi <br>
- This study evaluated the impacts of information about children’s <br>
 academic performance on parents’ subsequent investments in their <br>
  children’s education.


https://www.povertyactionlab.org/evaluation/effects-student-performance-information-parental-decision-making-malawi?lang=fr

https://www.nber.org/papers/w24610

<font size = "5">

Intervention

- Parents in Malawi with low literacy levels had trouble interpreting <br>
school report cards.Many parents were unaware that their children were <br>
struggling with school.
- The intervention altered the way that the schools engaged with <br>
 parents. It had an impact on reducing the information gaps.


<font size = "5">

Experimental Design

- Students were assigned to randomly to treatment and control <br>
with 50% probability
- The random assignment was done at the household level

<font size = "5">

Findings


<img src="figures/treatmenteffects_dizonross.png" alt="drawing" width="650"/>



<font size = "5">

At baseline (before the intervention)

- The graph on the left shows parental beliefs at baseline
- Parents of low performing students thought that they were doing <br>
better than they were. Ideally it should  be along the 45-degree line
- Similar results for treated and control groups at baseline because <br>
of randomization

At endline (after the intervention)

- Treated parents had more accurate perceptions of their children's <br>
performance
- The treatment effects varied depending on the baseline test scores


# <span style="color:darkblue"> II. Basic Descriptive Analysis </span>


<font size = "5">

Total number of children

In [3]:
len(dataset)

5268

<font size = "5">

Total number of households

- 2 children per households

In [4]:
unique_ids = pd.unique(dataset["hhid"])
len(unique_ids)

2634

<font size = "5">

Calculate number of treated and control

In [5]:
table = pd.crosstab(index = dataset['treat'], columns = "count")
table

col_0,count
treat,Unnamed: 1_level_1
Control,2654
Treatment,2614


# <span style="color:darkblue"> III. Testing Covariate Balance </span>


<font size = "5">

Subset treated and control observations

In [6]:
dataset_treated = dataset.query('treat == "Treatment"')
dataset_control = dataset.query('treat == "Control"')

<font size = "5">

Socio-economic information can be collected at baseline <br>
(before the experiment) 

In [7]:
variables_scores      = ["ave"]
variables_respondent  = ["lit","primary_resp_fem","age_par1","farmer"]
variables_household   = ["tot_kids","one_par"]
variables_student     = ["std","age","female","attendance_sv"]

<font size = "5" >

Check that characteristics are similar between treated and control <br>
at baseline

In [8]:
# Compute mean and standard deviation for the treated group
display(dataset_treated[variables_respondent].describe().loc[['mean', 'std']])

# Compute mean and standard deviation for the control group
display(dataset_control[variables_respondent].describe().loc[['mean', 'std']])

Unnamed: 0,lit,primary_resp_fem,age_par1,farmer
mean,0.675613,0.75899,40.97408,0.460587
std,0.468235,0.427778,11.290328,0.498541


Unnamed: 0,lit,primary_resp_fem,age_par1,farmer
mean,0.667426,0.773926,40.645455,0.465544
std,0.471225,0.418366,10.638597,0.498907


<font size = "5">



<font size = "5" >

Conduct a formal test of whether the coefficients are similar

- We should expect the coefficient on the treatment variable <br>
to be non-significant

In [9]:
reg_model = smf.ols("lit ~ treat ", dataset)
results = reg_model.fit(cov_type= "HC1")

print(summary_col(results,
                  stars = True))


                      lit   
----------------------------
Intercept          0.6674***
                   (0.0092) 
treat[T.Treatment] 0.0082   
                   (0.0130) 
R-squared          0.0001   
R-squared Adj.     -0.0001  
Standard errors in
parentheses.
* p<.1, ** p<.05, ***p<.01


<font size = "5">

Try it yourself!

<font size = "3">

- Obtain summary statistics for the mean and standard deviation for <br>
the other sets of baseline variables


In [10]:
# Write your own code




<font size = "5">

Try it yourself!

<font size = "3">

- Write a loop that runs different regressions of baseline covariates <br>
on the treatment variable. This can help you automate the process of <br>
testing for covariate balance

In [11]:
# Write your own code




# <span style="color:darkblue"> IV. Calculating Average Treatment Effect </span>


- Make sure to use robust standard errors

<font size = "5">

Effect of treatment on endline beliefs

In [12]:
reg_model = smf.ols("u_ave ~ treat ", dataset)
results = reg_model.fit(cov_type= "HC1")

print(summary_col(results,
                  stars = True))


                     u_ave   
-----------------------------
Intercept          63.5628***
                   (0.3435)  
treat[T.Treatment] -7.4218***
                   (0.4988)  
R-squared          0.0406    
R-squared Adj.     0.0404    
Standard errors in
parentheses.
* p<.1, ** p<.05, ***p<.01


<font size = "5">

Add baseline covariates

In [13]:
reg_model = smf.ols("u_ave ~ treat + ave ", dataset)
results = reg_model.fit(cov_type= "HC1")

print(summary_col(results,
                  stars = True))


                     u_ave   
-----------------------------
Intercept          39.6549***
                   (0.6801)  
treat[T.Treatment] -7.0564***
                   (0.4325)  
ave                0.5079*** 
                   (0.0129)  
R-squared          0.2725    
R-squared Adj.     0.2722    
Standard errors in
parentheses.
* p<.1, ** p<.05, ***p<.01


<font size = "5">

Test for heterogeneous effects


In [14]:
reg_model = smf.ols("u_ave ~ treat + ave + treat*ave", dataset)
results = reg_model.fit(cov_type= "HC1")

print(summary_col(results,
                  stars = True))


                          u_ave   
----------------------------------
Intercept              49.1885*** 
                       (0.8840)   
treat[T.Treatment]     -25.9979***
                       (1.2381)   
ave                    0.3054***  
                       (0.0177)   
treat[T.Treatment]:ave 0.4055***  
                       (0.0241)   
R-squared              0.3095     
R-squared Adj.         0.3091     
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01


<font size = "5">

Try it yourself!

Test for heterogeneous effects using other baseline covariates!

In [15]:
# Write your own code




