# Higher Education DataHub: Online Education Data
## Use Case for Arnold
### By Charlie Eaton and Laura Hamilton, UC Merced
### September 4, 2020, code available at 

## Download datasets from IPEDS, read into Stata, and restructure

In [28]:
quietly {
    forvalues year=2008/2018 {
copy https://nces.ed.gov/ipeds/datacenter/data/EF`year'A_Data_Stata.zip EF`year'A_Data_Stata.zip, replace
unzipfile EF`year'A_Data_Stata, replace
insheet using ef`year'a_data_stata.csv, clear
drop x*
foreach var in totlt totlm totlw aiant aianm aianw asiat asiam ///
asiaw bkaat bkaam bkaaw hispt hispm hispw whitt whitm whitw ///
unknt unknm unknw 2mort 2morm 2morw nralt nralm nralw nhpit ///
nhpim nhpiw {
quietly gen ftfirstef`var'=ef`var' if line == 1
quietly gen ptfirstef`var'=ef`var' if line == 15
quietly gen allfirstef`var'=ef`var' if efalevel==4
quietly gen ptunderef`var'=ef`var' if line == 22
quietly gen ftunderef`var'=ef`var' if line == 8
}
**REPLACE ZERO VALUES WITH SMALL NEGATIVE VALUES**
foreach group in totlt totlm totlw aiant aianm aianw asiat asiam ///
asiaw bkaat bkaam bkaaw hispt hispm hispw whitt whitm whitw ///
unknt unknm unknw 2mort 2morm 2morw nralt nralm nralw nhpit ///
nhpim nhpiw {
foreach level in ftfirst ptfirst allfirst ptunder ftunder {
quietly bysort unitid `level'ef`group' : gen miss`level'ef`group' = missing(`level'ef`group'[1])
}
}
**COLLAPSE ENROLLMENT VARIABLES TO A SINGLE ROW FOR ALL INSTITUTIONS**
collapse (min) miss* (sum) ft* pt* all*, by(unitid)

foreach group in totlt totlm totlw aiant aianm aianw asiat asiam ///
asiaw bkaat bkaam bkaaw hispt hispm hispw whitt whitm whitw ///
unknt unknm unknw 2mort 2morm 2morw nralt nralm nralw nhpit ///
nhpim nhpiw {
foreach level in ftfirst ptfirst allfirst ptunder ftunder {
quietly replace `level'ef`group'=. if miss`level'ef`group'
}
}
gen year=`year'
        drop miss*
save ef`year'a, replace
        
copy https://nces.ed.gov/ipeds/datacenter/data/HD`year'_Data_Stata.zip HD`year'_Data_Stata.zip, replace
unzipfile HD`year'_Data_Stata, replace
quietly insheet using hd`year'_data_stata.csv, clear
        destring ein gentele, replace
        tostring opeid, replace
gen year=`year'
save hd`year'a, replace

copy https://nces.ed.gov/ipeds/datacenter/data/EF`year'C_Data_Stata.zip EF`year'C_Data_Stata.zip, replace
unzipfile EF`year'C_Data_Stata, replace
insheet using ef`year'c_data_stata.csv, clear
    gen total_frosh_res = efres01 if line==99
        replace efres01=. if line>89
bysort unitid: egen topfroshstate=max(efres01)
        keep if line==99
        gen year=`year'
save ef`year'c, replace
    }
}

## Merge data sets
* "hd" are directory files
* "ef-year-a" files are enrollments by race/gender
* "ef-year-c" files are enrollments by state of student residence

In [113]:
quietly {
set more off
use hd2008a, clear
merge 1:1 unitid year using ef2008a, nogen
    merge 1:1 unitid year using ef2008c, nogen
forvalues year=2009/2018 {
    append using hd`year'a
    merge 1:1 unitid year using ef`year'a, nogen update
    merge 1:1 unitid year using ef`year'c, nogen keepusing(total_frosh_res topfroshstate) update
}
    drop miss*
    keep if control==3
        replace year=year+1
foreach group in totlt totlm totlw aiant aianm aianw asiat asiam ///
asiaw bkaat bkaam bkaaw hispt hispm hispw whitt whitm whitw ///
unknt unknm unknw 2mort 2morm 2morw nralt nralm nralw nhpit ///
nhpim nhpiw {
    egen tunderef`group'=rowtotal (ftunderef`group' ptunderef`group'), missing
    }
gen pctblackunder=tunderefbkaat / tundereftotlt
    gen pctlatinunder=tunderefhispt / tundereftotlt
}

## Create online indicator
* first flag schools if they have "online" in their name
* second flag schools if fewer than 33% of their students are from a single state (Deming, Goldin, & Katz 2013)

In [115]:
quietly {
replace instnm=lower(instnm)
gen online=regexm(instnm, "online")
by unitid: replace topfroshstate=topfroshstate[_n+1] if topfroshstate==.
by unitid: replace topfroshstate=topfroshstate[_n-1] if topfroshstate==.
by unitid: replace total_frosh_res=total_frosh_res[_n+1] if total_frosh_res==.
by unitid: replace total_frosh_res=total_frosh_res[_n-1] if total_frosh_res==.
replace online=1 if topfroshstate/total_frosh_res<.333333
}

## Total for-profit colleges by online status:

In [103]:
table year online, c(n tundereftotlt)



------------------------
          |    online   
     year |     0      1
----------+-------------
     2009 | 2,952     68
     2010 | 3,104     72
     2011 | 3,296     79
     2012 | 3,477     82
     2013 | 3,587     83
     2014 | 3,613     74
     2015 | 3,571     70
     2016 | 3,453     79
     2017 | 3,317     79
     2018 | 3,005     88
     2019 | 2,707     86
------------------------


## Total undergrad enrollments by online status

In [117]:
table year online, c(sum tundereftotlt) format(%10.0fc)


----------------------------------
          |         online        
     year |          0           1
----------+-----------------------
     2009 |  1,092,912     527,701
     2010 |  1,375,350     664,506
     2011 |  1,510,362     693,069
     2012 |  1,450,197     673,127
     2013 |  1,301,659     644,668
     2014 |  1,239,261     535,823
     2015 |  1,160,651     507,837
     2016 |    956,623     453,956
     2017 |    817,332     404,357
     2018 |    739,805     394,369
     2019 |    659,111     361,301
----------------------------------


## Ratio of students who are Black

In [116]:
table year online, c(mean pctblackunder) format(%4.2f)


----------------------
          |   online  
     year |    0     1
----------+-----------
     2009 | 0.20  0.11
     2010 | 0.21  0.15
     2011 | 0.23  0.17
     2012 | 0.24  0.18
     2013 | 0.23  0.18
     2014 | 0.24  0.21
     2015 | 0.24  0.21
     2016 | 0.24  0.20
     2017 | 0.24  0.19
     2018 | 0.25  0.21
     2019 | 0.24  0.21
----------------------
