In [1]:
import stata_setup 
import pandas as pd
stata_setup.config("/Applications/STATA","se")


  ___  ____  ____  ____  ____ ®
 /__    /   ____/   /   ____/      18.0
___/   /   /___/   /   /___/       SE—Standard Edition

 Statistics and Data Science       Copyright 1985-2023 StataCorp LLC
                                   StataCorp
                                   4905 Lakeway Drive
                                   College Station, Texas 77845 USA
                                   800-STATA-PC        https://www.stata.com
                                   979-696-4600        stata@stata.com

Stata license: Unlimited-user network, expiring 31 Aug 2024
Serial number: 401809300159
  Licensed to: Bruno Komel
               University of Pittsburgh

Notes:
      1. Unicode is supported; see help unicode_advice.
      2. Maximum number of variables is set to 5,000 but can be increased;
          see help set_maxvar.


# RAND Experiment

In [2]:
%%stata

use "https://github.com/brunokomel/econ-3080-recitations/raw/main/Recitation%201%20-%20RCTs%20(Balance%20Tests)/rand_initial_sample.dta", clear


* Plan types:
/* 
	Plan type 1 = "Free plan"
	Plan type 2 = "Deductible plan"
	Plan type 3 = "Coinsurance plan"
	Plan type 4 = "Catastrophic plan" or "No Insurance"
*/



. 
. use "https://github.com/brunokomel/econ-3080-recitations/raw/main/Recitation%
> 201%20-%20RCTs%20(Balance%20Tests)/rand_initial_sample.dta", clear

. 
. 
. * Plan types:
. /* 
>         Plan type 1 = "Free plan"
>         Plan type 2 = "Deductible plan"
>         Plan type 3 = "Coinsurance plan"
>         Plan type 4 = "Catastrophic plan" or "No Insurance"
> */
. 


In [3]:
%%stata

* Create means for catastrophic plan
matrix means_sd = J(11, 2, .) // this creates an empty matrix with eleven rows and two columns
local row = 1

foreach var of varlist female blackhisp age educper income1cpi hosp ghindx cholest systol mhi {
	summarize `var' if plantype == 4
	matrix means_sd[`row', 1] = r(mean)
	matrix means_sd[`row', 2] = r(sd)
	local row = `row'+1
}
                    
// the for loop above fills in the matrix

count if plantype_4 == 1
matrix means_sd[11, 1] = r(N) // here we're filling in the final element of the matrix with the number of observations

matrix rownames means_sd = female blackhisp age educper income1cpi hosp ghindx cholest systol mhi plantype
matrix list means_sd

#d ;
frmttable, statmat(means_sd) substat(1) varlabels sdec(4)
		   ctitle("", "Cata. mean") replace;
#d cr
                    
// With this last chunk of code, we're formatting the table. statmat(.) calls the matrix to use, 
//                    substat(1) means that each element will have one additional statistic that will be placed below it
//                    varlabels tells stata to use the labels matching the variable names
//                    sdec(4) tells stata to use 4 decimal points
//                    ctitle(. , . ) gives titles to each column
//                   replace tells stata to replace whatever table it had stored most recently. 
//                    Another option is "merge" (see below), which joins the current output with the most recently output table


. 
. * Create means for catastrophic plan
. matrix means_sd = J(11, 2, .)

. local row = 1

. 
. foreach var of varlist female blackhisp age educper income1cpi hosp ghindx ch
> olest systol mhi {
  2.         summarize `var' if plantype == 4
  3.         matrix means_sd[`row', 1] = r(mean)
  4.         matrix means_sd[`row', 2] = r(sd)
  5.         local row = `row'+1
  6. }

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
      female |        759    .5599473    .4967206          0          1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
   blackhisp |        600    .1716667    .3774051          0          1

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
         age |        759      32.361    1

In [4]:
%%stata

* Create regression output
* Column 2: Deductible plan compared to catastrophic plan
matrix deduct_diff = J(11, 2, .)
local row = 1

foreach var of varlist female blackhisp age educper income1cpi hosp ghindx cholest systol mhi {
	reg `var' plantype_1 plantype_2 plantype_3, cl(famid)
	matrix deduct_diff[`row', 1] = _b[plantype_2]
	matrix deduct_diff[`row', 2] = _se[plantype_2]
	local row = `row'+1
}
count if plantype_2 == 1
matrix deduct_diff[11, 1] = r(N)

#d ;
frmttable, statmat(deduct_diff) varlabels sdec(4)
		   ctitle("Deduct - cata.") substat(1) merge;
#d cr


. 
. * Create regression output
. * Column 2: Deductible plan compared to catastrophic plan
. matrix deduct_diff = J(11, 2, .)

. local row = 1

. 
. foreach var of varlist female blackhisp age educper income1cpi hosp ghindx ch
> olest systol mhi {
  2.         reg `var' plantype_1 plantype_2 plantype_3, cl(famid)
  3.         matrix deduct_diff[`row', 1] = _b[plantype_2]
  4.         matrix deduct_diff[`row', 2] = _se[plantype_2]
  5.         local row = `row'+1
  6. }

Linear regression                               Number of obs     =      3,957
                                                F(3, 1982)        =       2.14
                                                Prob > F          =     0.0935
                                                R-squared         =     0.0007
                                                Root MSE          =     .49878

                              (Std. err. adjusted for 1,983 clusters in famid)
------------------------------------------------


Linear regression                               Number of obs     =      3,817
                                                F(3, 1940)        =       1.46
                                                Prob > F          =     0.2228
                                                R-squared         =     0.0016
                                                Root MSE          =     13.846

                              (Std. err. adjusted for 1,941 clusters in famid)
------------------------------------------------------------------------------
             |               Robust
         mhi | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
-------------+----------------------------------------------------------------
  plantype_1 |   .8900588   .7652742     1.16   0.245    -.6107875    2.390905
  plantype_2 |  -.1201439       .821    -0.15   0.884    -1.730279    1.489991
  plantype_3 |   1.189266   .8094931     1.47   0.142    -.3983018    2.776834
       _cons |

# Exercise


In [5]:
%%stata

* Column 3: Coinsurance plan compared to catastrophic plan
matrix coins_diff = J(11, 2, .)
local row = 1

foreach var of varlist female blackhisp age educper income1cpi hosp ghindx cholest systol mhi {
	reg `var' plantype_1 plantype_2 plantype_3, cl(famid)
	matrix coins_diff[`row', 1] = _b[plantype_3]
	matrix coins_diff[`row', 2] = _se[plantype_3]
	local row = `row'+1
}

count if plantype_3 == 1
matrix coins_diff[11, 1] = r(N)

#d ;
frmttable, statmat(coins_diff) varlabels sdec(4)
		   ctitle("Coins - cata") substat(1) merge;
#d cr


. 
. * Column 3: Coinsurance plan compared to catastrophic plan
. matrix coins_diff = J(11, 2, .)

. local row = 1

. 
. foreach var of varlist female blackhisp age educper income1cpi hosp ghindx ch
> olest systol mhi {
  2.         reg `var' plantype_1 plantype_2 plantype_3, cl(famid)
  3.         matrix coins_diff[`row', 1] = _b[plantype_3]
  4.         matrix coins_diff[`row', 2] = _se[plantype_3]
  5.         local row = `row'+1
  6. }

Linear regression                               Number of obs     =      3,957
                                                F(3, 1982)        =       2.14
                                                Prob > F          =     0.0935
                                                R-squared         =     0.0007
                                                Root MSE          =     .49878

                              (Std. err. adjusted for 1,983 clusters in famid)
------------------------------------------------------------------------------



Linear regression                               Number of obs     =      3,817
                                                F(3, 1940)        =       1.46
                                                Prob > F          =     0.2228
                                                R-squared         =     0.0016
                                                Root MSE          =     13.846

                              (Std. err. adjusted for 1,941 clusters in famid)
------------------------------------------------------------------------------
             |               Robust
         mhi | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
-------------+----------------------------------------------------------------
  plantype_1 |   .8900588   .7652742     1.16   0.245    -.6107875    2.390905
  plantype_2 |  -.1201439       .821    -0.15   0.884    -1.730279    1.489991
  plantype_3 |   1.189266   .8094931     1.47   0.142    -.3983018    2.776834
       _cons |

In [6]:
%%stata

* Column 4: Coinsurance plan compared to catastrophic plan
matrix free_diff = J(11, 2, .)
local row = 1

foreach var of varlist female blackhisp age educper income1cpi hosp ghindx cholest systol mhi {
	reg `var' plantype_1 plantype_2 plantype_3, cl(famid)
	matrix free_diff[`row', 1] = _b[plantype_1]
	matrix free_diff[`row', 2] = _se[plantype_1]
	local row = `row'+1
}

count if plantype_1 == 1
matrix free_diff[11, 1] = r(N)

#d ;
frmttable, statmat(free_diff) varlabels sdec(4)
		   ctitle("Free - cata.") substat(1) merge;
#d cr



. 
. * Column 4: Coinsurance plan compared to catastrophic plan
. matrix free_diff = J(11, 2, .)

. local row = 1

. 
. foreach var of varlist female blackhisp age educper income1cpi hosp ghindx ch
> olest systol mhi {
  2.         reg `var' plantype_1 plantype_2 plantype_3, cl(famid)
  3.         matrix free_diff[`row', 1] = _b[plantype_1]
  4.         matrix free_diff[`row', 2] = _se[plantype_1]
  5.         local row = `row'+1
  6. }

Linear regression                               Number of obs     =      3,957
                                                F(3, 1982)        =       2.14
                                                Prob > F          =     0.0935
                                                R-squared         =     0.0007
                                                Root MSE          =     .49878

                              (Std. err. adjusted for 1,983 clusters in famid)
------------------------------------------------------------------------------
   


Linear regression                               Number of obs     =      2,254
                                                F(3, 1175)        =       1.38
                                                Prob > F          =     0.2481
                                                R-squared         =     0.0022
                                                Root MSE          =     42.872

                              (Std. err. adjusted for 1,176 clusters in famid)
------------------------------------------------------------------------------
             |               Robust
     cholest | Coefficient  std. err.      t    P>|t|     [95% conf. interval]
-------------+----------------------------------------------------------------
  plantype_1 |  -5.246336   2.701373    -1.94   0.052    -10.54639    .0537178
  plantype_2 |  -1.420108    2.98409    -0.48   0.634    -7.274848    4.434632
  plantype_3 |  -1.931604   2.758279    -0.70   0.484    -7.343305    3.480097
       _cons |

In [7]:
%%stata

* Column 5: Any insurance plan compared to catastrophic plan
matrix any_diff = J(11, 2, .)
local row = 1

foreach var of varlist female blackhisp age educper income1cpi hosp ghindx cholest systol mhi {
	reg `var' any_ins, cl(famid)
	matrix any_diff[`row', 1] = _b[any_ins]
	matrix any_diff[`row', 2] = _se[any_ins]
	local row = `row'+1
}

count if any_ins == 1
matrix any_diff[11, 1] = r(N)

#d ;
frmttable, statmat(any_diff) varlabels sdec(4)
		   ctitle("Any - cata.") substat(1) merge;
#d cr


. 
. * Column 5: Any insurance plan compared to catastrophic plan
. matrix any_diff = J(11, 2, .)

. local row = 1

. 
. foreach var of varlist female blackhisp age educper income1cpi hosp ghindx ch
> olest systol mhi {
  2.         reg `var' any_ins, cl(famid)
  3.         matrix any_diff[`row', 1] = _b[any_ins]
  4.         matrix any_diff[`row', 2] = _se[any_ins]
  5.         local row = `row'+1
  6. }

Linear regression                               Number of obs     =      3,957
                                                F(1, 1982)        =       5.11
                                                Prob > F          =     0.0240
                                                R-squared         =     0.0005
                                                Root MSE          =     .49869

                              (Std. err. adjusted for 1,983 clusters in famid)
------------------------------------------------------------------------------
             |               Robust

# Miguel & Kremer

Install randomize package 
ssc install randomize

First example: From Miguel & Kremer (ECTA, 2004) // 

Note: You can obtain the dataset and replication code from https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/28038. The .dta file namelist is needed for this exercise. 

Remember to set your working directory correctly using the "cd..." command

In [None]:
cd "~/Documents/Pitt/Year_2/TA - Econ 3080/Recitations/Recitation 3"

In [None]:
* Start with Namelist data
use namelist.dta, clear 

* Each school is a distinct data point, weighted by number of pupils
    keep if visit==981 
    collapse sex elg98 stdgap yrbirth wgrp* (count) np=pupid, by (sch98v1) 

In [None]:
**** TABLE 1: PANEL A
bys wgrp: summ sex elg98 stdgap yrbirth [aw=np] //bysort treatment group, summarise these variables 

foreach var in sex elg98 stdgap yrbirth { 
    regress `var' wgrp1 wgrp2 [aw=np] 
} 

In [None]:
randomize, groups(3) generate(grp)

*Note: We can check the balance of this grp variable as follows: 
bys grp: summ sex elg98 stdgap yrbirth [aw=np] //bysort treatment group, summarise these variables 

gen grp1 = (grp == 1) //creating dummies for each group category
gen grp2 = (grp == 2)

foreach var in sex elg98 stdgap yrbirth { 
	regress `var' grp1 grp2 [aw=np] 
} 

In [None]:
// Another example: sysuse nlsw88 // 
clear
sysuse nlsw88.dta //another preloaded dataset (similar to auto.dta), but from the National Longitudinal Survey of Women in 88. 

gen black = (race == 2)

randomize, groups(2) generate(grp)
bysort grp: sum age black married collgrad 

In [None]:
randomize, groups(2) block(black) generate(grp_alt)
bysort grp_alt: sum age black married collgrad 

# Exercise: Recreate Panel A in Table I in Miguel & Kremer

In [None]:
use namelist.dta, clear 

keep if visit==981 
	collapse sex elg98 stdgap yrbirth wgrp* (count) np=pupid, by (sch98v1) 

label var sex "Male"
label var elg98 "Proportion girls"
label var stdgap "Grade"
label var yrbirth "Year of Birth"

In [None]:
matrix drop _all
mata: mata clear 

forvalues g = 1/3{

matrix mean_dep_`g' = J(4,2,.)
local i = 1


foreach var of varlist sex elg98 stdgap yrbirth{
    
    sum `var' [aw = np] if wgrp == `g'
    matrix mean_dep_`g'[`i',1] = r(mean)
    matrix rownames mean_dep_`g' = sex elg98 stdgap yrbirth 
    local i = `i' + 1
    
}

frmttable using "Table1.tex", statmat(mean_dep_`g') substat(1) ctitle("", "Group `g'") varlabels merge
    
}

In [None]:
matrix drop _all
mata: mata clear

*Columns 1-3
forvalues g = 1/3{

matrix mean_dep_`g' = J(4,2,.)
local i = 1	

foreach var of varlist sex elg98 stdgap yrbirth{
	
	summ `var' [aw=np] if wgrp == `g'
	matrix mean_dep_`g'[`i',1] = r(mean)
	matrix rownames mean_dep_`g' =  sex elg98 stdgap yrbirth
	local i = `i' + 1
}
frmttable, statmat(mean_dep_`g') substat(1) ctitle("","Group `g'")  varlabels merge
}

In [None]:
* Column 4

matrix control_diff_1 = J(4,2,.)
local row = 1

foreach var in sex elg98 stdgap yrbirth { 
	regress `var' wgrp1 wgrp2 [aw=np] 
	matrix control_diff_1[`row',1] = _b[wgrp1]
	matrix control_diff_1[`row',2] = _se[wgrp1]
	local row = `row' + 1
} 

matrix list control_diff_1

frmttable, statmat(control_diff_1) substat(1) ctitle("Group 1 - Group 3") merge

In [None]:
* Column 5

matrix control_diff_2 = J(4,2,.)
local row = 1

foreach var in sex elg98 stdgap yrbirth { 
	regress `var' wgrp1 wgrp2 [aw=np] 
	matrix control_diff_2[`row',1] = _b[wgrp2]
	matrix control_diff_2[`row',2] = _se[wgrp2]
	local row = `row' + 1
} 

matrix list control_diff_2

frmttable, statmat(control_diff_2) substat(1) ctitle("Group 2 - Group 3")  merge