In [1]:
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm
import numpy as np
from sklearn.linear_model import LinearRegression

## School FE

### No interactions

In [2]:
math = pd.read_csv('../final_data_all_state/final_data_all_state_mathpass.csv').iloc[:, 1:]
math['year'] = math['year'].astype('str')
math_dummy = pd.get_dummies(math.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested']), 
           weights = math_dummy.totaltested)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,7.937898,0.391939,20.252901,2.593722e-90,7.169667,8.70613
black,-0.408005,0.040136,-10.16567,3.223494e-24,-0.486673,-0.329336
hispanic,-0.222579,0.031738,-7.01303,2.405123e-12,-0.284787,-0.16037
white,-0.029814,0.031323,-0.951817,0.3412013,-0.091209,0.031582
lowincome,-0.046789,0.007487,-6.248986,4.213159e-10,-0.061465,-0.032113


### Race interactions

In [3]:
math = pd.read_csv('../final_data_all_state/final_data_all_state_mathpass.csv').iloc[:, 1:]
math['year'] = math['year'].astype('str')
math['black_remote'] = math['black'] * math['schoolmode']
math['his_remote'] = math['hispanic'] * math['schoolmode']
math_dummy = pd.get_dummies(math.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested', 'his_remote']), 
           weights = math_dummy.totaltested)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'black_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,7.95782,0.419194,18.983628,1.135158e-79,7.136166,8.779474
black,-0.408031,0.040137,-10.165957,3.21408e-24,-0.486702,-0.329359
hispanic,-0.22255,0.031739,-7.011789,2.426516e-12,-0.284761,-0.160338
white,-0.029792,0.031324,-0.951086,0.3415721,-0.09119,0.031606
lowincome,-0.04682,0.007491,-6.249978,4.18655e-10,-0.061504,-0.032137
black_remote,-0.002344,0.017495,-0.134006,0.8933989,-0.036636,0.031947


In [4]:
model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested', 'black_remote']), 
           weights = math_dummy.totaltested)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'his_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,18.412059,0.669265,27.510878,1.262156e-163,17.100247,19.723872
black,-0.352483,0.039882,-8.83807,1.05137e-18,-0.430656,-0.27431
hispanic,-0.170359,0.031572,-5.395894,6.894407e-08,-0.232243,-0.108475
white,0.043342,0.031276,1.385783,0.1658283,-0.017962,0.104645
lowincome,-0.038754,0.007432,-5.214095,1.865202e-07,-0.053322,-0.024185
his_remote,-0.173142,0.009009,-19.218582,1.364783e-81,-0.190801,-0.155484


### Income interactions

In [5]:
math = pd.read_csv('../final_data_all_state/final_data_all_state_mathpass.csv').iloc[:, 1:]
math['year'] = math['year'].astype('str')
math['income_remote'] = math['lowincome'] * math['schoolmode']
math_dummy = pd.get_dummies(math.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested']), 
           weights = math_dummy.totaltested)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'income_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,9.768332,0.738238,13.231962,8.374503e-40,8.321327,11.215337
black,-0.400468,0.040211,-9.959252,2.5979100000000002e-23,-0.479284,-0.321652
hispanic,-0.213983,0.031868,-6.714742,1.933627e-11,-0.276447,-0.15152
white,-0.018082,0.031573,-0.572703,0.5668519,-0.079967,0.043803
lowincome,-0.045407,0.007501,-6.053493,1.442178e-09,-0.06011,-0.030705
income_remote,-0.031661,0.010822,-2.925659,0.003441067,-0.052873,-0.010449


### Charter interactions

In [6]:
math = pd.read_csv('../final_data_all_state/final_data_all_state_mathpass.csv').iloc[:, 1:]
math['year'] = math['year'].astype('str')
math['charter_remote'] = math['charter'] * math['schoolmode']
math_dummy = pd.get_dummies(math.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested']), 
           weights = math_dummy.totaltested)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'charter_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,8.317686,0.402616,20.659079,7.408491e-94,7.528525,9.106846
black,-0.406033,0.040123,-10.119724,5.1477669999999996e-24,-0.484677,-0.327389
hispanic,-0.221294,0.031727,-6.974917,3.154268e-12,-0.283482,-0.159106
white,-0.029439,0.031311,-0.94023,0.3471106,-0.090811,0.031932
lowincome,-0.048086,0.007491,-6.418884,1.403028e-10,-0.062769,-0.033402
charter_remote,-4.04922,0.988894,-4.094694,4.243566e-05,-5.987532,-2.110907


### District-year interactions

In [7]:
math = pd.read_csv('../final_data_all_state/final_data_all_state_mathpass.csv').iloc[:, 1:]
math['year'] = math['year'].astype('str')
math['district_year'] = math['districtcode'] + math['year']
math_dummy = pd.get_dummies(math.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested']), 
           weights = math_dummy.totaltested)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,2.787333,1.224992,2.275389,0.02290005,0.386152,5.188514
black,-0.492969,0.041134,-11.984315,6.595312e-33,-0.573599,-0.412338
hispanic,-0.356298,0.033685,-10.577233,4.937854e-26,-0.422327,-0.290269
white,-0.055144,0.034402,-1.60294,0.108974,-0.122577,0.012289
lowincome,-0.032914,0.013255,-2.483065,0.01303919,-0.058896,-0.006931


### County-year interactions

In [8]:
math = pd.read_csv('../final_data_all_state/final_data_all_state_mathpass.csv').iloc[:, 1:]
math['year'] = math['year'].astype('str')
math['county_year'] = math['countycode'] + math['year']
math_dummy = pd.get_dummies(math.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested']), 
           weights = math_dummy.totaltested)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,-2.196939,0.650347,-3.378103,0.0007313767,-3.471679,-0.922199
black,-0.468591,0.035379,-13.244927,7.320484e-40,-0.537936,-0.399245
hispanic,-0.328953,0.028489,-11.546754,9.762135000000001e-31,-0.384794,-0.273113
white,-0.064818,0.028412,-2.281341,0.02253953,-0.120508,-0.009127
lowincome,-0.018612,0.007058,-2.636812,0.008375897,-0.032447,-0.004777


### State-year interactions

In [9]:
math = pd.read_csv('../final_data_all_state/final_data_all_state_mathpass.csv').iloc[:, 1:]
math['year'] = math['year'].astype('str')
math['state_year'] = math['state'] + math['year']
math_dummy = pd.get_dummies(math.drop(columns = ['districtcode', 'countycode', 'state']))

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter', 
                                                                              'virtualper','hybridper', 
                                                                              'totaltested']), 
           weights = math_dummy.totaltested)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,-8.948009,0.50386,-17.758927,4.960194e-70,-9.935615,-7.960403
black,-0.466955,0.036098,-12.935927,3.9882399999999996e-38,-0.537709,-0.396201
hispanic,-0.271946,0.028523,-9.534385,1.671921e-21,-0.327853,-0.21604
white,-0.005329,0.028151,-0.189285,0.8498712,-0.060507,0.04985
lowincome,-0.019455,0.006743,-2.885262,0.003914978,-0.032672,-0.006238


## District FE

### No interactions

In [10]:
math = pd.read_csv('../final_data_all_state/final_data_all_state_mathpass.csv').iloc[:, 1:]
math['year'] = math['year'].astype('str')
math_district = math.groupby(['districtcode', 'year']).mean().reset_index().drop(columns = 'totaltested')
total = math.groupby(['districtcode', 'year']).sum().reset_index().loc[:, ['districtcode', 
                                                                           'totaltested', 'year']]
math_district = math_district.merge(total, on = ['districtcode', 'year'])
math_dummy = pd.get_dummies(math_district)

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter',
                                                                              'virtualper',
                                                                    'hybridper', 'totaltested']), 
           weights = math_dummy.totaltested)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,10.795976,0.489167,22.070138,6.194671e-105,9.837087,11.754866
black,-0.346888,0.071411,-4.857653,1.209634e-06,-0.486871,-0.206905
hispanic,-0.103763,0.051354,-2.02054,0.04335956,-0.20443,-0.003096
white,-0.133205,0.049578,-2.686758,0.007229315,-0.230391,-0.036019
lowincome,-0.05084,0.009948,-5.110813,3.279429e-07,-0.07034,-0.031341


### Race interactions

In [11]:
math_district['black_remote'] = math_district['black'] * math_district['schoolmode']
math_district['his_remote'] = math_district['hispanic'] * math_district['schoolmode']
math_dummy = pd.get_dummies(math_district)

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter',
                                                                              'virtualper','hybridper', 
                                                                              'totaltested', 'his_remote']), 
           weights = math_dummy.totaltested)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'black_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,10.438197,0.531542,19.637593,5.873489e-84,9.396243,11.480152
black,-0.342655,0.071445,-4.796094,1.646135e-06,-0.482705,-0.202606
hispanic,-0.103036,0.05135,-2.006554,0.04482964,-0.203694,-0.002378
white,-0.132673,0.049573,-2.676283,0.007459032,-0.229849,-0.035496
lowincome,-0.050124,0.009955,-5.035001,4.879195e-07,-0.069639,-0.03061
black_remote,0.040799,0.023731,1.719201,0.08561521,-0.00572,0.087318


In [12]:
model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter',
                                                                              'virtualper','hybridper', 
                                                                              'totaltested', 'black_remote']), 
           weights = math_dummy.totaltested)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'his_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,24.837288,0.885806,28.039181,2.379219e-165,23.100885,26.57369
black,-0.223308,0.070238,-3.179283,0.001481834,-0.360993,-0.085623
hispanic,-0.013692,0.050517,-0.271044,0.7863641,-0.112718,0.085334
white,-0.001257,0.049054,-0.02562,0.9795612,-0.097414,0.094901
lowincome,-0.038784,0.009763,-3.972766,7.1645e-05,-0.057922,-0.019647
his_remote,-0.231096,0.012263,-18.844789,1.340656e-77,-0.255135,-0.207058


### Income interactions

In [13]:
math = pd.read_csv('../final_data_all_state/final_data_all_state_mathpass.csv').iloc[:, 1:]
math['year'] = math['year'].astype('str')
math_district = math.groupby(['districtcode', 'year']).mean().reset_index().drop(columns = 'totaltested')
total = math.groupby(['districtcode', 'year']).sum().reset_index().loc[:, ['districtcode', 
                                                                           'totaltested', 'year']]
math_district = math_district.merge(total, on = ['districtcode', 'year'])
math_district['income_remote'] = math_district['lowincome'] * math_district['schoolmode']
math_dummy = pd.get_dummies(math_district)

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter',
                                                                              'virtualper','hybridper', 
                                                                              'totaltested']), 
           weights = math_dummy.totaltested)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome', 'income_remote']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,11.061093,0.999171,11.070269,2.759608e-28,9.102468,13.019719
black,-0.345715,0.071519,-4.833922,1.362781e-06,-0.485909,-0.205521
hispanic,-0.10227,0.051591,-1.982331,0.0474752,-0.203401,-0.001139
white,-0.131164,0.050032,-2.621586,0.008768158,-0.229241,-0.033088
lowincome,-0.050666,0.009965,-5.084533,3.766038e-07,-0.070199,-0.031133
income_remote,-0.004486,0.014741,-0.304303,0.7609044,-0.033381,0.02441


### Charter interactions

In [14]:
math = pd.read_csv('../final_data_all_state/final_data_all_state_mathpass.csv').iloc[:, 1:]
math['year'] = math['year'].astype('str')
math_district = math.groupby(['districtcode', 'year']).mean().reset_index().drop(columns = 'totaltested')
total = math.groupby(['districtcode', 'year']).sum().reset_index().loc[:, ['districtcode', 
                                                                           'totaltested', 'year']]
math_district = math_district.merge(total, on = ['districtcode', 'year'])
math_district['charter_remote'] = math_district['charter'] * math_district['schoolmode']
math_dummy = pd.get_dummies(math_district)

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass',
                                                                              'virtualper','hybridper', 
                                                                              'totaltested']), 
           weights = math_dummy.totaltested)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,10.928965,0.520767,20.986276,2.566686e-95,9.908131,11.949799
black,-0.346532,0.071414,-4.852419,1.241922e-06,-0.486521,-0.206542
hispanic,-0.103164,0.051362,-2.008571,0.04461504,-0.203846,-0.002482
white,-0.132554,0.049587,-2.673144,0.007529133,-0.229758,-0.035351
lowincome,-0.05124,0.009962,-5.143347,2.760683e-07,-0.070768,-0.031711


### County-year interactions

In [15]:
math = pd.read_csv('../final_data_all_state/final_data_all_state_mathpass.csv').iloc[:, 1:]
math['year'] = math['year'].astype('str')
math_district = math.groupby(['districtcode', 'year']).mean().reset_index().drop(columns = 'totaltested')
total = math.groupby(['districtcode', 'year']).sum().reset_index().loc[:, ['districtcode', 
                                                                           'totaltested', 'year']]
math_district = math_district.merge(total, on = ['districtcode', 'year']).merge(
    math.loc[:, ['districtcode', 'countycode']].drop_duplicates(), on = 'districtcode')
math_district['county_year'] = math_district['countycode'] + math_district['year']
math_dummy = pd.get_dummies(math_district)

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter',
                                                                              'virtualper','hybridper', 
                                                                              'totaltested']), 
           weights = math_dummy.totaltested)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,-2.874783,0.860696,-3.340067,0.0008423142,-4.562032,-1.187534
black,-0.352211,0.06046,-5.825512,5.966873e-09,-0.470732,-0.233689
hispanic,-0.247261,0.045224,-5.467475,4.735252e-08,-0.335915,-0.158607
white,-0.120407,0.044152,-2.727072,0.006407112,-0.20696,-0.033853
lowincome,-0.022026,0.008542,-2.578405,0.009947632,-0.038771,-0.00528


### State-year interactions

In [16]:
math = pd.read_csv('../final_data_all_state/final_data_all_state_mathpass.csv').iloc[:, 1:]
math['year'] = math['year'].astype('str')
math_district = math.groupby(['districtcode', 'year']).mean().reset_index().drop(columns = 'totaltested')
total = math.groupby(['districtcode', 'year']).sum().reset_index().loc[:, ['districtcode', 
                                                                           'totaltested', 'year']]
math_district = math_district.merge(total, on = ['districtcode', 'year']).merge(
    math.loc[:, ['districtcode', 'state']].drop_duplicates(), on = 'districtcode')
math_district['state_year'] = math_district['state'] + math_district['year']
math_dummy = pd.get_dummies(math_district)

model = sm.WLS(endog = math_dummy.mathpass, exog = math_dummy.drop(columns = ['mathpass', 'charter',
                                                                              'virtualper','hybridper', 
                                                                              'totaltested']), 
           weights = math_dummy.totaltested)

wls = model.fit()

summary_df = wls.summary2().tables[1]
coeffs_of_interest = ['schoolmode', 'black','hispanic', 'white', 'lowincome']
summary_of_interest = summary_df.loc[coeffs_of_interest]
summary_of_interest 

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
schoolmode,-11.728941,0.638618,-18.366118,7.108553e-74,-12.980793,-10.477089
black,-0.438713,0.060863,-7.208166,6.169926e-13,-0.55802,-0.319405
hispanic,-0.205592,0.043535,-4.722458,2.36845e-06,-0.290931,-0.120252
white,-0.094273,0.04201,-2.244072,0.0248541,-0.176623,-0.011923
lowincome,-0.018809,0.008445,-2.227334,0.02595174,-0.035362,-0.002255
