In [1]:
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm
import numpy as np
from sklearn.linear_model import LinearRegression

## School FE

### No interactions

In [2]:
drop = pd.read_csv('../final_data_all_state/final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop_dummy = pd.get_dummies(drop.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,-0.324814,0.195976,-1.65742,0.09746154,-0.70896,0.059331
black,0.106752,0.021007,5.081718,3.798199e-07,0.065575,0.14793
hispanic,-0.028433,0.015462,-1.838891,0.06595667,-0.05874,0.001875
white,-0.02887,0.015617,-1.848598,0.06454118,-0.059482,0.001742
lowincome,0.006146,0.005219,1.177643,0.2389631,-0.004084,0.016377


### Race interactions

In [3]:
drop = pd.read_csv('../final_data_all_state/final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop['black_remote'] = drop['black'] * drop['schoolmode']
drop['his_remote'] = drop['hispanic'] * drop['schoolmode']
drop_dummy = pd.get_dummies(drop.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled', 'his_remote']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black', 'hispanic', 'white', 'lowincome', 'black_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,0.090162,0.203426,0.443219,0.6576152,-0.308587,0.488911
black,0.089451,0.021089,4.241532,2.237106e-05,0.048112,0.13079
hispanic,-0.030578,0.015429,-1.981839,0.04752066,-0.060822,-0.000334
white,-0.02358,0.015598,-1.511757,0.1306227,-0.054154,0.006994
lowincome,0.007325,0.00521,1.406037,0.1597397,-0.002887,0.017537
black_remote,-0.02903,0.003927,-7.39233,1.541295e-13,-0.036728,-0.021333


In [4]:
model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled', 'black_remote']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'his_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,-0.038267,0.266093,-0.143809,0.8856537,-0.559852,0.483319
black,0.108674,0.02104,5.165024,2.443683e-07,0.067432,0.149917
hispanic,-0.025294,0.015586,-1.622856,0.1046472,-0.055845,0.005257
white,-0.025512,0.015758,-1.618987,0.105477,-0.0564,0.005376
lowincome,0.006034,0.005219,1.156023,0.2476952,-0.004197,0.016264
his_remote,-0.004655,0.002924,-1.591839,0.1114479,-0.010386,0.001077


### Income interactions

In [5]:
drop = pd.read_csv('../final_data_all_state/final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop['income_remote'] = drop['lowincome'] * drop['schoolmode']
drop_dummy = pd.get_dummies(drop.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'income_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,2.251602,0.281389,8.001752,1.342354e-15,1.700033,2.803171
black,0.09787,0.020877,4.688028,2.789725e-06,0.056949,0.138792
hispanic,-0.016129,0.015388,-1.048149,0.2945915,-0.046291,0.014034
white,0.00856,0.01579,0.542149,0.5877264,-0.02239,0.039511
lowincome,0.015837,0.00524,3.022458,0.002512743,0.005566,0.026108
income_remote,-0.039102,0.003084,-12.679043,1.3461049999999999e-36,-0.045148,-0.033057


### Charter interactions

In [6]:
drop = pd.read_csv('../final_data_all_state/final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop['charter_remote'] = drop['charter'] * drop['schoolmode']
drop_dummy = pd.get_dummies(drop.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout',  
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'charter', 'charter_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,-0.270617,0.205579,-1.316368,0.1880765,-0.673586,0.132351
black,0.107436,0.021029,5.108884,3.291832e-07,0.066215,0.148657
hispanic,-0.028029,0.015471,-1.811699,0.07005838,-0.058356,0.002297
white,-0.028523,0.015625,-1.825456,0.06795781,-0.05915,0.002105
lowincome,0.006027,0.005221,1.154224,0.2484318,-0.004208,0.016262
charter,0.338036,0.564414,0.598914,0.549242,-0.768311,1.444382
charter_remote,-0.196708,0.300507,-0.654587,0.5127467,-0.785751,0.392336


### District-year interactions

In [7]:
drop = pd.read_csv('../final_data_all_state/final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop['district_year'] = drop['districtcode'] + drop['year']
drop_dummy = pd.get_dummies(drop.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,-9.337088,1.799421,-5.188939,2.185735e-07,-12.864614,-5.809562
black,0.01592,0.030936,0.514597,0.6068539,-0.044726,0.076565
hispanic,-0.050666,0.024161,-2.097042,0.03603242,-0.09803,-0.003302
white,-0.032797,0.031745,-1.033145,0.3015783,-0.095029,0.029435
lowincome,0.029034,0.010259,2.830024,0.004670211,0.008922,0.049146


### County-year interactions

In [8]:
drop = pd.read_csv('../final_data_all_state/final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop['county_year'] = drop['countycode'] + drop['year']
drop_dummy = pd.get_dummies(drop.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,-1.319089,0.433932,-3.039854,0.002373,-2.169681,-0.468498
black,0.028625,0.022196,1.289632,0.197208,-0.014884,0.072133
hispanic,-0.054703,0.016548,-3.30567,0.000951,-0.087141,-0.022265
white,-0.036571,0.017305,-2.113381,0.034592,-0.070491,-0.002651
lowincome,0.020269,0.005533,3.663256,0.00025,0.009423,0.031115


### State-year interactions

In [9]:
drop = pd.read_csv('../final_data_all_state/final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop['state_year'] = drop['state'] + drop['year']
drop_dummy = pd.get_dummies(drop.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,-2.432059,0.267143,-9.103944,1.0120719999999999e-19,-2.955704,-1.908413
black,0.040836,0.020544,1.98776,0.04686155,0.000567,0.081105
hispanic,-0.038678,0.014997,-2.578962,0.009921894,-0.068075,-0.00928
white,-0.025044,0.015136,-1.654603,0.09803187,-0.054712,0.004625
lowincome,0.02015,0.00512,3.935637,8.345773e-05,0.010114,0.030186


## District FE

### No interactions

In [10]:
drop = pd.read_csv('../final_data_all_state/final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop_district = drop.groupby(['districtcode', 'year']).mean().reset_index().drop(columns = 'totalenrolled')
total = drop.groupby(['districtcode', 'year']).sum().reset_index().loc[:, ['districtcode', 
                                                                           'totalenrolled', 'year']]
drop_district = drop_district.merge(total, on = ['districtcode', 'year'])
drop_dummy = pd.get_dummies(drop_district)

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,-1.157046,0.196005,-5.90314,3.768969e-09,-1.541289,-0.772802
black,0.171681,0.026871,6.389008,1.799943e-10,0.119003,0.224359
hispanic,-0.043917,0.019459,-2.256829,0.02405593,-0.082064,-0.005769
white,-0.086189,0.017457,-4.937128,8.150595e-07,-0.120413,-0.051966
lowincome,-0.009113,0.006159,-1.479678,0.1390136,-0.021187,0.002961


### Race interactions

In [11]:
drop_district['black_remote'] = drop_district['black'] * drop_district['schoolmode']
drop_district['his_remote'] = drop_district['hispanic'] * drop_district['schoolmode']
drop_dummy = pd.get_dummies(drop_district)

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled', 'his_remote']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'black_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,-0.262299,0.20609,-1.272744,0.2031602,-0.666313,0.141714
black,0.10453,0.027047,3.86471,0.0001124388,0.051507,0.157553
hispanic,-0.057372,0.019231,-2.983353,0.002862991,-0.095071,-0.019673
white,-0.074892,0.017249,-4.341883,1.436779e-05,-0.108707,-0.041078
lowincome,-0.007853,0.006078,-1.291993,0.1964112,-0.019768,0.004062
black_remote,-0.053457,0.004254,-12.566689,9.403763e-36,-0.061796,-0.045118


In [12]:
model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled', 'black_remote']), 
           weights = drop_dummy.totalenrolled)
wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'his_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,-0.655462,0.289551,-2.263722,0.0236281,-1.22309,-0.087835
black,0.176696,0.026945,6.557595,5.942567e-11,0.123873,0.229519
hispanic,-0.038474,0.019589,-1.964054,0.04957165,-0.076875,-7.2e-05
white,-0.079421,0.017686,-4.490554,7.241812e-06,-0.114092,-0.044749
lowincome,-0.008974,0.006157,-1.457642,0.1449935,-0.021044,0.003095
his_remote,-0.008063,0.003427,-2.35271,0.01867046,-0.014781,-0.001345


### Income interactions

In [13]:
drop = pd.read_csv('../final_data_all_state/final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop_district = drop.groupby(['districtcode', 'year']).mean().reset_index().drop(columns = 'totalenrolled')
total = drop.groupby(['districtcode', 'year']).sum().reset_index().loc[:, ['districtcode', 
                                                                           'totalenrolled', 'year']]
drop_district = drop_district.merge(total, on = ['districtcode', 'year'])
drop_district['income_remote'] = drop_district['lowincome'] * drop_district['schoolmode']
drop_dummy = pd.get_dummies(drop_district)

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'income_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,2.504774,0.300043,8.348051,8.597267000000001e-17,1.916577,3.09297
black,0.120755,0.026502,4.556531,5.307415e-06,0.068802,0.172708
hispanic,-0.043965,0.019051,-2.307777,0.02104667,-0.081311,-0.006618
white,-0.036786,0.017372,-2.117599,0.03425163,-0.070841,-0.002731
lowincome,0.004966,0.006094,0.814821,0.4152084,-0.006981,0.016913
income_remote,-0.053235,0.003353,-15.875224,1.384369e-55,-0.059809,-0.046661


### Charter interactions

In [14]:
drop = pd.read_csv('../final_data_all_state/final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop_district = drop.groupby(['districtcode', 'year']).mean().reset_index().drop(columns = 'totalenrolled')
total = drop.groupby(['districtcode', 'year']).sum().reset_index().loc[:, ['districtcode', 
                                                                           'totalenrolled', 'year']]
drop_district = drop_district.merge(total, on = ['districtcode', 'year'])
drop_district['charter_remote'] = drop_district['charter'] * drop_district['schoolmode']
drop_dummy = pd.get_dummies(drop_district)

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome','charter','charter_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,-0.582654,0.219291,-2.656986,0.007905928,-1.012548,-0.152761
black,0.179116,0.026814,6.680039,2.612262e-11,0.126551,0.23168
hispanic,-0.049075,0.019432,-2.525429,0.01158221,-0.08717,-0.01098
white,-0.081443,0.017419,-4.675496,2.999035e-06,-0.115591,-0.047295
lowincome,-0.010287,0.006143,-1.674584,0.09406988,-0.022329,0.001756
charter,0.23583,0.555941,0.424199,0.6714366,-0.854023,1.325682
charter_remote,-2.867563,0.459822,-6.236244,4.799296e-10,-3.768986,-1.966139


### County-year interactions

In [15]:
drop = pd.read_csv('../final_data_all_state/final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop_district = drop.groupby(['districtcode', 'year']).mean().reset_index().drop(columns = 'totalenrolled')
total = drop.groupby(['districtcode', 'year']).sum().reset_index().loc[:, ['districtcode', 
                                                                           'totalenrolled', 'year']]
drop_district = drop_district.merge(total, on = ['districtcode', 'year']).merge(
    drop.loc[:, ['districtcode', 'countycode']].drop_duplicates(), on = 'districtcode')
drop_district['county_year'] = drop_district['countycode'] + drop_district['year']
drop_dummy = pd.get_dummies(drop_district)

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter',
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)


wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,-0.76284,0.426726,-1.787656,0.073902,-1.599443,0.073764
black,0.057455,0.031485,1.824851,0.068093,-0.004271,0.119181
hispanic,-0.10215,0.022179,-4.60565,4e-06,-0.145633,-0.058667
white,-0.086013,0.020048,-4.290291,1.8e-05,-0.125319,-0.046708
lowincome,0.009421,0.00648,1.453819,0.14607,-0.003284,0.022126


### State-year interactions

In [16]:
drop = pd.read_csv('../final_data_all_state/final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop_district = drop.groupby(['districtcode', 'year']).mean().reset_index().drop(columns = 'totalenrolled')
total = drop.groupby(['districtcode', 'year']).sum().reset_index().loc[:, ['districtcode', 
                                                                           'totalenrolled', 'year']]
drop_district = drop_district.merge(total, on = ['districtcode', 'year']).merge(
    drop.loc[:, ['districtcode', 'state']].drop_duplicates(), on = 'districtcode')
drop_district['state_year'] = drop_district['state'] + drop_district['year']
drop_dummy = pd.get_dummies(drop_district)

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter',
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

  llf += 0.5 * np.sum(np.log(self.weights))


Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,-3.582219,0.260924,-13.728991,3.115003e-42,-4.093727,-3.070711
black,0.048813,0.025821,1.890464,0.0587459,-0.001805,0.099432
hispanic,-0.050837,0.018408,-2.761726,0.005767845,-0.086923,-0.014751
white,-0.082333,0.016436,-5.009263,5.626354e-07,-0.114553,-0.050112
lowincome,0.00817,0.005894,1.38615,0.1657547,-0.003385,0.019725
