# Replication Notebooks
[The Impact of High School Financial Education: Evidence from a Large-Scale Evaluation in Brazil]( https://doi.org/10.1257/app.20150149.) 

By [Charlie Zhang](mailto:charlie.zhang@georgetown.edu)

In [1]:
import os
os.chdir("../")

import pandas as pd
import numpy as np
import statsmodels as sm
import statsmodels.formula.api as smf
import scipy

import warnings
warnings.filterwarnings("ignore")

In [2]:
panel_final = os.getcwd() + "/Data/school_intervention_panel_final.dta"

In [3]:
pf = pd.read_stata(panel_final)
pf.head(5)

Unnamed: 0,id_geral,cd_escola,nm_uf_bl,matriculas,docentes,abandonona1sriemdio,aprovaona1sriemdio,treatment,pair_all,treatment_workshop,...,dumm_rp_18p_fup,dumm_rp_19p_fup,dumm_rp_21p_fup,dumm_rp_23p_fup,dumm_formal_saving_fup,dumm_rp_33p_fup,dumm_rp_34p_fup,dumm_rp_36p_fup,dumm_rp_37p_fup,dumm_rp_41p_fup
0,1,17002648.0,TOCANTINS,273.0,34.0,6.8,76.7,yes,17017.0,,...,,,,,,,,,,
1,1,17002648.0,TOCANTINS,273.0,34.0,6.8,76.7,yes,17017.0,,...,,,,,,,,,,
2,10,17018390.0,TOCANTINS,641.0,29.0,4.2,90.7,no,17008.0,,...,1.0,1.0,,,1.0,1.0,0.0,0.0,0.0,
3,10,17018390.0,TOCANTINS,641.0,29.0,4.2,90.7,no,17008.0,,...,,,,,,,,,,
4,100,33002614.0,RIO DE JANEIRO,199.0,35.0,13.3,80.0,yes,33031.0,0.0,...,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,


In [4]:
# Line 41-43
pf = (pf[(pf["round"] == "yes")& (pf["treatment"].isna() == False)]
      .sort_values(by=["cd_escola", "id_geral"])
      .reset_index()
      .drop("index", axis=1))
pf.head(5)

Unnamed: 0,id_geral,cd_escola,nm_uf_bl,matriculas,docentes,abandonona1sriemdio,aprovaona1sriemdio,treatment,pair_all,treatment_workshop,...,dumm_rp_18p_fup,dumm_rp_19p_fup,dumm_rp_21p_fup,dumm_rp_23p_fup,dumm_formal_saving_fup,dumm_rp_33p_fup,dumm_rp_34p_fup,dumm_rp_36p_fup,dumm_rp_37p_fup,dumm_rp_41p_fup
0,10166,17000386.0,TOCANTINS,414.0,42.0,9.6,65.2,yes,17006.0,,...,,,,,,,,,,
1,10439,17000386.0,TOCANTINS,414.0,42.0,9.6,65.2,yes,17006.0,,...,,,,,,,,,,
2,10448,17000386.0,TOCANTINS,414.0,42.0,9.6,65.2,yes,17006.0,,...,,,,,,,,,,
3,10568,17000386.0,TOCANTINS,414.0,42.0,9.6,65.2,yes,17006.0,,...,,,,,,,,,,
4,13397,17000386.0,TOCANTINS,414.0,42.0,9.6,65.2,yes,17006.0,,...,,,,,,,,,,


## Table 1

In [5]:
test = [
    "female", "dumm_rp_08_bl", "dumm_rp_09_bl", "dumm_rp_24_bl",
    "dumm_rp_14_bl", "dumm_rp_23_bl", "vl_proficiencia_bl"
]
aluno = [
    "dumm_rp_49_bl", "dumm_rp_50_bl", "dumm_rp_65A_bl", "poupar_final2_bl",
    "dumm_rp_64A_bl", "dumm_negotiates_bl", "autonomia_final2_bl"
]
school = [
    "matriculas", "docentes", "abandonona1sriemdio", "aprovaona1sriemdio"
]
xvars = test + aluno

In [8]:
def ttest(stats, alpha=0.05):

    mean_1, mean_2 = stats[(var, "mean")]
    std_1, std_2 = stats[(var, "std")]
    n_1, n_2 = stats[(var, "count")]
    se_1, se_2 = std_1 / np.sqrt(n_1), std_2 / np.sqrt(n_2)
    sed = np.sqrt(se_1**2.0 + se_1**2.0)
    t_stat = (mean_1 - mean_2) / sed
    df = n_1 + n_2 - 2
    cv = scipy.stats.t.ppf(1.0 - alpha, df)
    p = (1.0 - scipy.stats.t.cdf(abs(t_stat), df)) * 2.0
    return p

In [9]:
t1p1 = pd.DataFrame()
for var in school:
    temp = ["treatment", "cd_escola"]
    temp.append(var)
    pf_temp = (pf[temp].drop_duplicates()[["treatment", var]])
    stats = pf_temp.groupby("treatment").agg({var: ["mean", "std", "count"]})
    stats[(var, "t-test")] = ttest(stats)
    t1p1 = pd.concat([t1p1, stats], axis=1).round(4)

display(t1p1)

Unnamed: 0_level_0,matriculas,matriculas,matriculas,matriculas,docentes,docentes,docentes,docentes,abandonona1sriemdio,abandonona1sriemdio,abandonona1sriemdio,abandonona1sriemdio,aprovaona1sriemdio,aprovaona1sriemdio,aprovaona1sriemdio,aprovaona1sriemdio
Unnamed: 0_level_1,mean,std,count,t-test,mean,std,count,t-test,mean,std,count,t-test,mean,std,count,t-test
treatment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
no,642.8363,461.6997,452,0.2156,37.4949,24.0912,392,0.6504,11.0351,11.2233,447,0.3488,68.051,16.0285,447,0.7751
yes,680.8932,514.618,440,0.2156,38.2751,25.7419,378,0.6504,11.7389,11.8174,435,0.3488,67.7446,15.9359,435,0.7751


In [10]:
t1p2 = pd.DataFrame()
for var in xvars:
    temp = ["treatment", "id_geral"]
    temp.append(var)
    pf_temp = (pf[temp].drop_duplicates()[["treatment", var]])
    stats = pf_temp.groupby("treatment").agg({var: ["mean", "std", "count"]})
    stats[(var, "t-test")] = ttest(stats)
    t1p2 = pd.concat([t1p2, stats], axis=1).round(3)

display(t1p2.T)

Unnamed: 0,treatment,no,yes
female,mean,0.549,0.563
female,std,0.498,0.496
female,count,11954.0,11529.0
female,t-test,0.034,0.034
dumm_rp_08_bl,mean,0.446,0.435
dumm_rp_08_bl,std,0.497,0.496
dumm_rp_08_bl,count,11774.0,11350.0
dumm_rp_08_bl,t-test,0.111,0.111
dumm_rp_09_bl,mean,0.414,0.405
dumm_rp_09_bl,std,0.493,0.491


## Table 2