# Merges QCEW, FRED, and US Census into Single Dataset
This is the final set of the data cleaning

In [1]:
%%capture
# Capture suppresses output to screen
import stata_setup
stata_setup.config("/Applications/Stata 17", "be")

In [2]:
%%stata
global data "Data"
global output "Output"
global processed "$data/Processed"

global finalQCEWData "$processed/QCEW/MergedQCEW.dta"
global finalFredData "$processed/FRED/MergedFred.dta"
global finalCensusData "$processed/US_Census/MergedCensus.dta"

global finalSynthData "$processed/finalSynth.dta"


. global data "Data"

. global output "Output"

. global processed "$data/Processed"

. 
. global finalQCEWData "$processed/QCEW/MergedQCEW.dta"

. global finalFredData "$processed/FRED/MergedFred.dta"

. global finalCensusData "$processed/US_Census/MergedCensus.dta"

. 
. global finalSynthData "$processed/finalSynth.dta"

. 


In [3]:
%%capture
%%stata
log using "$output/SynthMerge.log", replace

## Read in QCEW data

In [4]:
%%stata
use "$finalQCEWData", clear
describe


. use "$finalQCEWData", clear

. describe

Contains data from Data/Processed/QCEW/MergedQCEW.dta
 Observations:           680                  
    Variables:             6                  5 Feb 2022 16:28
-------------------------------------------------------------------------------
Variable      Storage   Display    Value
    name         type    format    label      Variable label
-------------------------------------------------------------------------------
year            int     %10.0g                Year
employment      long    %10.0g                Annual Reported Employment (NAICS
                                                3364)
state           str2    %9s                   
annualPay       long    %10.0g                Average Annual Pay (USD) (NAICS
                                                3364)
lnEmployment    float   %9.0g                 
lnAnnualPay     float   %9.0g                 
------------------------------------------------------------------------

## Merge QCEW and FRED data

In [5]:
%%stata
merge 1:1 state year using $finalFredData


(variable year was int, now float to accommodate using data's values)

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                               680  (_merge==3)
    -----------------------------------------


## Merge QCEW/FRED with US Census data

In [6]:
%%stata
drop _merge
merge 1:1 state year using $finalCensusData


. drop _merge

. merge 1:1 state year using $finalCensusData

    Result                      Number of obs
    -----------------------------------------
    Not matched                           268
        from master                        34  (_merge==1)
        from using                        234  (_merge==2)

    Matched                               646  (_merge==3)
    -----------------------------------------

. 


Year 2020 will be dropped from the data set due to the  
2020-2021 COVID-19 pandemic.

In [7]:
%%stata
list state year annualPay employment if _merge==1


     +------------------------------------+
     | state   year   annual~y   employ~t |
     |------------------------------------|
 20. |    AL   2020      93622      12820 |
 40. |    AR   2020      62146       4192 |
 60. |    AZ   2020     110968      30502 |
 80. |    CA   2020     113819      75955 |
100. |    CO   2020     143902       8593 |
     |------------------------------------|
120. |    CT   2020     117382      31263 |
140. |    FL   2020     100174      26940 |
160. |    GA   2020      95015      19484 |
180. |    IL   2020      66578       3310 |
200. |    IN   2020      94367       6076 |
     |------------------------------------|
220. |    KS   2020      78936      25793 |
240. |    KY   2020      75035       3169 |
260. |    MA   2020     150953      11028 |
280. |    MD   2020      98354       3407 |
300. |    MI   2020      77141       5171 |
     |------------------------------------|
320. |    MO   2020     105995      17814 |
340. |    MS   2020      79860 

In [8]:
%%stata
drop if year==2020

/* QCEW dataset begins at 2001 */
drop if year==2000


. drop if year==2020
(34 observations deleted)

. 
. /* QCEW dataset begins at 2001 */
. drop if year==2000
(36 observations deleted)

. 


In [9]:
%%stata
list state year annualPay employment pop if _merge==2


     +----------------------------------------------+
     | state   year   annual~y   employ~t       pop |
     |----------------------------------------------|
647. |    AK   2010          .          .    713910 |
648. |    AK   2011          .          .    722128 |
649. |    AK   2012          .          .    730443 |
650. |    AK   2013          .          .    737068 |
651. |    AK   2014          .          .    736283 |
     |----------------------------------------------|
652. |    AK   2015          .          .    737498 |
653. |    AK   2016          .          .    741456 |
654. |    AK   2017          .          .    739700 |
655. |    AK   2018          .          .    735139 |
656. |    AK   2019          .          .    731545 |
     |----------------------------------------------|
657. |    DC   2010          .          .    605226 |
658. |    DC   2011          .          .    619800 |
659. |    DC   2012          .          .    634924 |
660. |    DC   2013        

Observations with _merge==2 do not have significant NAICS 3364 industries.  
US population data for 2010-2019 contained all states and
DC and Puerto Rico.  These will be dropped from the dataset

In [10]:
%%stata
drop if _merge==2
drop _merge


. drop if _merge==2
(198 observations deleted)

. drop _merge

. 


## Merge in FIPS codes

In [11]:
%%stata
merge m:1 state using "$processed/StateCodes.dta", keepusing(fips)


    Result                      Number of obs
    -----------------------------------------
    Not matched                            18
        from master                         0  (_merge==1)
        from using                         18  (_merge==2)

    Matched                               646  (_merge==3)
    -----------------------------------------


In [12]:
%%stata
list state if _merge==2


     +-------+
     | state |
     |-------|
647. |    AK |
648. |    DC |
649. |    DE |
650. |    HI |
651. |    IA |
     |-------|
652. |    ID |
653. |    LA |
654. |    ME |
655. |    MN |
656. |    MT |
     |-------|
657. |    ND |
658. |    NE |
659. |    NV |
660. |    PR |
661. |    RI |
     |-------|
662. |    SD |
663. |    VT |
664. |    WY |
     +-------+


In [13]:
%%stata
/* These states have no significant NAIC 3364 industry */
drop if _merge==2
drop _merge


. /* These states have no significant NAIC 3364 industry */
. drop if _merge==2
(18 observations deleted)

. drop _merge

. 


## Generate Dummies for DID Analysis

### Add indicator dummies for incentive year, leads and lags
Tax incentive was passed in 2014 in WA.  In 2014 WA was selected as the expansion site.

In [14]:
%%stata
sort state year
by state: generate postIncentive=(year>=2014)
label variable postIncentive "=1 if year >=2014"


. sort state year

. by state: generate postIncentive=(year>=2014)

. label variable postIncentive "=1 if year >=2014"

. 


In [15]:
%%stata
// T0 to examine the pre and post trends
by state: generate t_0 = (postIncentive==1 & postIncentive[_n-1]==0) 

// Generate 4 years of lags and leads
foreach i of numlist -4/4 {
    if `i' < 0 {
        local var "lag"
    }
    if `i' > 0 {
        local var "lead"
    }
    
    // So the lag/lead numbers are always positive
    local j: display int(abs(`i'))
    
    /* 
        Note: A negative value of i will result
            in lag.
    */
    by state: generate t_`var'`j'=(t_0[_n+`i']==1)
}
// Group all lags older than 4 year together
by state: replace t_lag4=1 if (t_lag4[_n-1]==1)


. // T0 to examine the pre and post trends
. by state: generate t_0 = (postIncentive==1 & postIncentive[_n-1]==0) 

. 
. // Generate 4 years of lags and leads
. foreach i of numlist -4/4 {
  2.     if `i' < 0 {
  3.         local var "lag"
  4.     }
  5.     if `i' > 0 {
  6.         local var "lead"
  7.     }
  8.     
.     // So the lag/lead numbers are always positive
.     local j: display int(abs(`i'))
  9.     
.     /* 
>         Note: A negative value of i will result
>             in lag.
>     */
.     by state: generate t_`var'`j'=(t_0[_n+`i']==1)
 10. }

. // Group all lags older than 4 year together
. by state: replace t_lag4=1 if (t_lag4[_n-1]==1)
(34 real changes made)

. 


In [16]:
%%stata
generate wa=(state=="WA")
label variable wa "=1 if the state is WA"

generate treated = postIncentive*wa
label variable treated "=1 if after 2014 and in WA"


. generate wa=(state=="WA")

. label variable wa "=1 if the state is WA"

. 
. generate treated = postIncentive*wa

. label variable treated "=1 if after 2014 and in WA"

. 


## Declare as Panel Dataset

In [17]:
%%stata
sort fips year
xtset fips year


. sort fips year

. xtset fips year

Panel variable: fips (strongly balanced)
 Time variable: year, 2001 to 2019
         Delta: 1 unit

. 


## Save Dataset for Analysis

In [18]:
%%stata
save "$finalSynthData", replace


file Data/Processed/finalSynth.dta saved


In [19]:
%%stata
clear
log close


. clear

. log close
      name:  <unnamed>
       log:  /Users/alwashere/Documents/School Work/University of Maryland/MS A
> pplied Economics/2022 1Q Winter/ECON 672 Program Evaluation/ResearchPaper/Out
> put/SynthMerge.log
  log type:  text
 closed on:   5 Feb 2022, 19:04:52
-------------------------------------------------------------------------------

. 
