In [1]:
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm
import numpy as np
from sklearn.linear_model import LinearRegression

## School FE

### No interactions

In [7]:
drop = pd.read_csv('final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop_dummy = pd.get_dummies(drop.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

### Race interactions

In [None]:
drop = pd.read_csv('final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop['black_remote'] = drop['black'] * drop['schoolmode']
drop['his_remote'] = drop['hispanic'] * drop['schoolmode']
drop_dummy = pd.get_dummies(drop.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled', 'his_remote']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black', 'hispanic', 'white', 'lowincome', 'black_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

In [None]:
model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled', 'black_remote']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'his_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

### Income interactions

In [None]:
drop = pd.read_csv('final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop['income_remote'] = drop['lowincome'] * drop['schoolmode']
drop_dummy = pd.get_dummies(drop.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'income_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

### Charter interactions

In [None]:
drop = pd.read_csv('final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop['charter_remote'] = drop['charter'] * drop['schoolmode']
drop_dummy = pd.get_dummies(drop.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout',  
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'charter', 'charter_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

### District-year interactions

In [None]:
drop = pd.read_csv('final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop['district_year'] = drop['districtcode'] + drop['year']
drop_dummy = pd.get_dummies(drop.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

### County-year interactions

In [None]:
drop = pd.read_csv('final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop['county_year'] = drop['countycode'] + drop['year']
drop_dummy = pd.get_dummies(drop.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

### State-year interactions

In [None]:
drop = pd.read_csv('final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop['state_year'] = drop['state'] + drop['year']
drop_dummy = pd.get_dummies(drop.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

## District FE

### No interactions

In [18]:
drop = pd.read_csv('final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop_district = drop.groupby(['districtcode', 'year']).mean().reset_index().drop(columns = 'totalenrolled')
total = drop.groupby(['districtcode', 'year']).sum().reset_index().loc[:, ['districtcode', 
                                                                           'totalenrolled', 'year']]
drop_district = drop_district.merge(total, on = ['districtcode', 'year'])
drop_dummy = pd.get_dummies(drop_district)

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

### Race interactions

In [None]:
drop_district['black_remote'] = drop_district['black'] * drop_district['schoolmode']
drop_district['his_remote'] = drop_district['hispanic'] * drop_district['schoolmode']
drop_dummy = pd.get_dummies(drop_district)

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled', 'his_remote']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'black_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

In [None]:
model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled', 'black_remote']), 
           weights = drop_dummy.totalenrolled)
wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'his_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

### Income interactions

In [None]:
drop = pd.read_csv('final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop_district = drop.groupby(['districtcode', 'year']).mean().reset_index().drop(columns = 'totalenrolled')
total = drop.groupby(['districtcode', 'year']).sum().reset_index().loc[:, ['districtcode', 
                                                                           'totalenrolled', 'year']]
drop_district = drop_district.merge(total, on = ['districtcode', 'year'])
drop_district['income_remote'] = drop_district['lowincome'] * drop_district['schoolmode']
drop_dummy = pd.get_dummies(drop_district)

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'income_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

### Charter interactions

In [None]:
drop = pd.read_csv('final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop_district = drop.groupby(['districtcode', 'year']).mean().reset_index().drop(columns = 'totalenrolled')
total = drop.groupby(['districtcode', 'year']).sum().reset_index().loc[:, ['districtcode', 
                                                                           'totalenrolled', 'year']]
drop_district = drop_district.merge(total, on = ['districtcode', 'year'])
drop_district['charter_remote'] = drop_district['charter'] * drop_district['schoolmode']
drop_dummy = pd.get_dummies(drop_district)

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome','charter','charter_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

### County-year interactions

In [27]:
drop = pd.read_csv('final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop_district = drop.groupby(['districtcode', 'year']).mean().reset_index().drop(columns = 'totalenrolled')
total = drop.groupby(['districtcode', 'year']).sum().reset_index().loc[:, ['districtcode', 
                                                                           'totalenrolled', 'year']]
drop_district = drop_district.merge(total, on = ['districtcode', 'year']).merge(
    drop.loc[:, ['districtcode', 'countycode']].drop_duplicates(), on = 'districtcode')
drop_district['county_year'] = drop_district['countycode'] + drop_district['year']
drop_dummy = pd.get_dummies(drop_district)

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter',
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)


wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

### State-year interactions

In [32]:
drop = pd.read_csv('final_data_all_state_dropout.csv').iloc[:, 1:]
drop['year'] = drop['year'].astype('str')
drop_district = drop.groupby(['districtcode', 'year']).mean().reset_index().drop(columns = 'totalenrolled')
total = drop.groupby(['districtcode', 'year']).sum().reset_index().loc[:, ['districtcode', 
                                                                           'totalenrolled', 'year']]
drop_district = drop_district.merge(total, on = ['districtcode', 'year']).merge(
    drop.loc[:, ['districtcode', 'state']].drop_duplicates(), on = 'districtcode')
drop_district['state_year'] = drop_district['state'] + drop_district['year']
drop_dummy = pd.get_dummies(drop_district)

model = sm.WLS(endog = drop_dummy.dropout, exog = drop_dummy.drop(columns = ['dropout', 'charter',
                                                                              'virtualper','hybridper', 
                                                                              'totalenrolled']), 
           weights = drop_dummy.totalenrolled)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 