In [1]:
import stata_setup
import pandas as pd
stata_setup.config("/Applications/STATA","se")


  ___  ____  ____  ____  ____ ®
 /__    /   ____/   /   ____/      18.0
___/   /   /___/   /   /___/       SE—Standard Edition

 Statistics and Data Science       Copyright 1985-2023 StataCorp LLC
                                   StataCorp
                                   4905 Lakeway Drive
                                   College Station, Texas 77845 USA
                                   800-STATA-PC        https://www.stata.com
                                   979-696-4600        stata@stata.com

Stata license: Unlimited-user network, expiring 31 Aug 2024
Serial number: 401809300159
  Licensed to: Bruno Komel
               University of Pittsburgh

Notes:
      1. Unicode is supported; see help unicode_advice.
      2. Maximum number of variables is set to 5,000 but can be increased;
          see help set_maxvar.


In [4]:
%%stata

global data  "/Users/brunokomel/Documents/Pitt/Year 3/TA - Econ 3080/econ-3080-recitations"
global working  "/Users/brunokomel/Documents/Pitt/Year 3/TA - Econ 3080/econ-3080-recitations/Recitation 2 - DiD"


. 
. global data  "/Users/brunokomel/Documents/Pitt/Year 3/TA - Econ 3080/econ-308
> 0-recitations"

. global working  "/Users/brunokomel/Documents/Pitt/Year 3/TA - Econ 3080/econ-
> 3080-recitations/Recitation 2 - DiD"

. 


In [5]:
%%stata

cd "${data}"


. 
. cd "${data}"
/Users/brunokomel/Documents/Pitt/Year 3/TA - Econ 3080/econ-3080-recitations

. 


In [6]:
%%stata

set scheme gg_tableau


. 
. set scheme gg_tableau

. 


In [7]:
%%stata

# Import data */
infile SHEET CHAIN CO_OWNED STATE SOUTHJ CENTRALJ NORTHJ PA1 PA2      ///
       SHORE NCALLS EMPFT EMPPT NMGRS WAGE_ST INCTIME FIRSTINC BONUS  ///
       PCTAFF MEALS OPEN HRSOPEN PSODA PFRY PENTREE NREGS NREGS11     ///
       TYPE2 STATUS2 DATE2 NCALLS2 EMPFT2 EMPPT2 NMGRS2 WAGE_ST2      ///
       INCTIME2 FIRSTIN2 SPECIAL2 MEALS2 OPEN2R HRSOPEN2 PSODA2 PFRY2 ///
       PENTREE2 NREGS2 NREGS112 using "public.dat", clear

# Label the state variables and values */
label var STATE "State"
label define state_labels 0 "PA" 1 "NJ"
label values STATE state_labels

# Calculate FTE employement */
gen FTE  = EMPFT  + 0.5 * EMPPT  + NMGRS
label var FTE  "FTE employment before"
gen FTE2 = EMPFT2 + 0.5 * EMPPT2 + NMGRS2
label var FTE2 "FTE employment after"

gen dif = FTE - FTE2

SystemError: 
. 
. # Import data */
Unknown #command
. infile SHEET CHAIN CO_OWNED STATE SOUTHJ CENTRALJ NORTHJ PA1 PA2      ///
>        SHORE NCALLS EMPFT EMPPT NMGRS WAGE_ST INCTIME FIRSTINC BONUS  ///
>        PCTAFF MEALS OPEN HRSOPEN PSODA PFRY PENTREE NREGS NREGS11     ///
>        TYPE2 STATUS2 DATE2 NCALLS2 EMPFT2 EMPPT2 NMGRS2 WAGE_ST2      ///
>        INCTIME2 FIRSTIN2 SPECIAL2 MEALS2 OPEN2R HRSOPEN2 PSODA2 PFRY2 ///
>        PENTREE2 NREGS2 NREGS112 using "public.dat", clear
file public.dat not found
r(601);
r(601);


In [None]:
%%stata

cd "$working"

save working_data, replace

In [None]:
%%stata -doutd df1

use working_data.dta, clear

In [None]:
display(df1)

In [None]:
%%stata

tabstat FTE FTE2, by(STATE) stat(mean semean)

In [None]:
%%stata

## Editing the data so we can match the paper
expand 2 
## This creates a dublciate of each observation and it appends it to the bottom of the dataset


In [None]:
%%stata

cd "$working"

save working_data, replace

In [None]:
%%stata -doutd df1

use working_data.dta, clear

In [None]:
display(df1)

In [None]:
%%stata

gen id = _n 

## Since the data is compiled with one observation per "sheet" or store, 
## I want to separate them between prior and post treatment

In [None]:
%%stata

gen after = 1 if id > _N/2 
## creating the "after" treatment indicator
replace after = 0 if id <= _N/2


In [None]:
%%stata

tab STATE, gen(state) 
## this will create indicator variables for each state

In [None]:
%%stata

rename state2 nj 
## naming one variable after New Jersey

In [None]:
%%stata

gen njafter = after*nj 
## creating an interaction term

In [None]:
%%stata


gen fte = FTE 
#I don't like capital letters and I want this new "fte" variable to disagreggate
#the FTE and FTE2 variables into different observations



In [None]:
%%stata

replace fte = FTE2 if after ==1 

In [None]:
%%stata

## Now we can do Diff-in-Diff analyses:

reg fte nj after njafter, robust 
## The traditional specification

In [None]:
%%stata

reg fte njafter nj after, cluster(SHEET) 
## Clustering Standard Errors by store

In [None]:
%%stata

reg dif nj after nj#after, robust 
## An alternative way to run this regression

In [None]:
%%stata

reg dif nj after njafter, robust 
##A little cleaner way to do the same thing as above

In [None]:
%%stata

## ssc install diff
diff fte, t(nj) p(after)

In [None]:
%%stata

save working_data, replace

In [None]:
%%stata

##preserve 

##use working_data
##qui reg fte nj after njafter, robust

##collapse (mean) fte, by(nj after)
##save working_data_did, replace


use working_data_did.dta

In [None]:
%%stata

twoway (connected fte after if nj ==1, color(blue)) (connected fte after if nj ==0, color(red)), xline(0.5) ///
legend(label(1 NJ - Treatment) label(2 PA - Control)) 
save working_data_did, replace

In [None]:
%%stata
clear
use working_data.dta

quietly reg fte nj after njafter, robust 
##I'm just running this because I want to store one of the coefficients
gen fte_did = fte + _b[nj] 
##Storing the coefficients + the constant so we can observe the 'counterfactual


In [None]:
%%stata
clear
## preserve

## reg fte nj after njafter, robust

## collapse (mean) fte fte_did, by(nj after)

## save working_data_did2
use working_data_did2
twoway (connected fte after if nj ==1, color(blue)) (connected fte after if nj ==0, color(red)) ///
(connected fte_did after if nj ==0, color(red) lpattern(dash)) , ///
xline(0.5)   legend(label(1 NJ - Treatment) label(2 PA - Control) label(3 Counterfactual) ) 

##restore


In [None]:
%%stata

/// Difference-in-differences Exercise /// 
cd "$data"
use panel101.dta, clear //reference: slides by Torres-Reyna @ https://www.princeton.edu/~otorres/DID101.pdf


In [None]:
%%stata

tab year //from 1990 to 1999
tab country, nolabel //7 countries

In [None]:
%%stata

gen time = (year>=1994) & !missing(year) 
##generating before and after period, equivalent to a time fixed effect 

In [None]:
%%stata


gen treated = (country > 4) & !missing(country) 
## generating treatment units and non-treatment units, equivalent to a group fixed effect 


In [None]:
%%stata

gen did = time*treated 

## D_it = 1 if country ? 4 and year >= 1994

In [None]:
%%stata

reg y time treated did


In [None]:
%%stata

diff y, t(treated) p(time) 


In [None]:
%%stata

reg y time treated did
gen y_did = y + _b[treated]


In [None]:
%%stata
clear
cd "$working"
## preserve 

## collapse (mean) y y_did (max) love, by(treated time)

## save ex_working_did, replace
use ex_working_did

twoway (connected y time if treated ==1, color(blue)) (connected y time if treated ==0, color(red)) ///
(connected y_did time if treated ==0, color(red) lpattern(dash))  , ///
xline(0.5) legend(label(1 Treated) label(2  Control) label(3 Counterfactual))

##restore
