In [1]:
import pandas as pd
from linearmodels.panel import PanelOLS

## School FE

### Base regression - no interactions

In [3]:
data = pd.read_csv('final_data_all_state_mathpass.csv').iloc[:, 1:].set_index(['schoolcode', 'year'])

outcome_var = 'mathpass'
exog_vars = ['schoolmode', 'black', 'hispanic', 'lowincome', 'white']

model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                 weights = data['totaltested'], entity_effects = True, time_effects = True)

results = model.fit(cov_type = 'clustered', clusters = data['districtcode'])
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:               mathpass   R-squared:                        0.0323
Estimator:                   PanelOLS   R-squared (Between):             -0.7190
No. Observations:               24734   R-squared (Within):              -0.0678
Date:                Thu, Sep 21 2023   R-squared (Overall):             -0.6955
Time:                        20:50:50   Log-likelihood                -8.718e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      136.04
Entities:                        4348   P-value                           0.0000
Avg Obs:                       5.6886   Distribution:                 F(5,20376)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             22.509
                            

In [4]:
data = pd.read_csv('final_data_all_state_mathpass.csv').iloc[:, 1:].set_index(['schoolcode', 'year'])

outcome_var = 'mathpass'
exog_vars = ['virtualper', 'hybridper', 'black', 'hispanic', 'lowincome', 'white']

model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                 weights = data['totaltested'], entity_effects = True, time_effects = True)

results = model.fit(cov_type = 'clustered', clusters = data['districtcode'])
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:               mathpass   R-squared:                        0.0453
Estimator:                   PanelOLS   R-squared (Between):             -0.6614
No. Observations:               24734   R-squared (Within):              -0.0913
Date:                Thu, Sep 21 2023   R-squared (Overall):             -0.6408
Time:                        20:51:40   Log-likelihood                -8.702e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      161.12
Entities:                        4348   P-value                           0.0000
Avg Obs:                       5.6886   Distribution:                 F(6,20375)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             28.516
                            

### Race interaction

In [5]:
data = pd.read_csv('final_data_all_state_mathpass.csv').iloc[:, 1:]
data['black_mode'] = data['black'] * data['schoolmode']
data = data.set_index(['schoolcode', 'year'])

outcome_var = 'mathpass'
exog_vars = ['schoolmode', 'black_mode', 'black', 'hispanic', 'lowincome', 'white']

model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                 weights = data['totaltested'], entity_effects = True, time_effects = True)

results = model.fit(cov_type = 'clustered', clusters = data['districtcode'])
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:               mathpass   R-squared:                        0.0323
Estimator:                   PanelOLS   R-squared (Between):             -0.7190
No. Observations:               24734   R-squared (Within):              -0.0678
Date:                Thu, Sep 21 2023   R-squared (Overall):             -0.6955
Time:                        20:53:16   Log-likelihood                -8.718e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      113.36
Entities:                        4348   P-value                           0.0000
Avg Obs:                       5.6886   Distribution:                 F(6,20375)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             19.288
                            

In [10]:
data = pd.read_csv('final_data_all_state_mathpass.csv').iloc[:, 1:]
data['black_virtual'] = data['black'] * data['virtualper']
data['black_hybrid'] = data['black'] * data['hybridper']
data = data.set_index(['schoolcode', 'year'])

outcome_var = 'mathpass'
exog_vars = ['virtualper', 'hybridper', 'black_virtual', 'black_hybrid', 
             'black', 'hispanic', 'lowincome', 'white']

model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                 weights = data['totaltested'], entity_effects = True, time_effects = True)

results = model.fit(cov_type = 'clustered', clusters = data['districtcode'])
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:               mathpass   R-squared:                        0.0453
Estimator:                   PanelOLS   R-squared (Between):             -0.6612
No. Observations:               24734   R-squared (Within):              -0.0912
Date:                Thu, Sep 21 2023   R-squared (Overall):             -0.6406
Time:                        21:03:55   Log-likelihood                -8.702e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      120.84
Entities:                        4348   P-value                           0.0000
Avg Obs:                       5.6886   Distribution:                 F(8,20373)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             21.657
                            

In [6]:
data = pd.read_csv('final_data_all_state_mathpass.csv').iloc[:, 1:]
data['his_mode'] = data['hispanic'] * data['schoolmode']
data = data.set_index(['schoolcode', 'year'])

outcome_var = 'mathpass'
exog_vars = ['schoolmode', 'his_mode', 'black', 'hispanic', 'lowincome', 'white']

model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                 weights = data['totaltested'], entity_effects = True, time_effects = True)

results = model.fit(cov_type = 'clustered', clusters = data['districtcode'])
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:               mathpass   R-squared:                        0.0495
Estimator:                   PanelOLS   R-squared (Between):             -0.4434
No. Observations:               24734   R-squared (Within):              -0.0707
Date:                Thu, Sep 21 2023   R-squared (Overall):             -0.4299
Time:                        20:57:35   Log-likelihood                -8.696e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      176.97
Entities:                        4348   P-value                           0.0000
Avg Obs:                       5.6886   Distribution:                 F(6,20375)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             26.215
                            

In [11]:
data = pd.read_csv('final_data_all_state_mathpass.csv').iloc[:, 1:]
data['his_virtual'] = data['hispanic'] * data['virtualper']
data['his_hybrid'] = data['hispanic'] * data['hybridper']
data = data.set_index(['schoolcode', 'year'])

outcome_var = 'mathpass'
exog_vars = ['virtualper', 'hybridper', 'his_virtual', 'his_hybrid', 
             'black', 'hispanic', 'lowincome', 'white']

model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                 weights = data['totaltested'], entity_effects = True, time_effects = True)

results = model.fit(cov_type = 'clustered', clusters = data['districtcode'])
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:               mathpass   R-squared:                        0.0664
Estimator:                   PanelOLS   R-squared (Between):             -0.4658
No. Observations:               24734   R-squared (Within):              -0.0724
Date:                Thu, Sep 21 2023   R-squared (Overall):             -0.4516
Time:                        21:04:34   Log-likelihood                -8.674e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      181.25
Entities:                        4348   P-value                           0.0000
Avg Obs:                       5.6886   Distribution:                 F(8,20373)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             22.775
                            

In [7]:
data = pd.read_csv('final_data_all_state_mathpass.csv').iloc[:, 1:]
data['bh_mode'] = (data['black'] + data['hispanic']) * data['schoolmode']
data = data.set_index(['schoolcode', 'year'])

outcome_var = 'mathpass'
exog_vars = ['schoolmode', 'bh_mode', 'black', 'hispanic', 'lowincome', 'white']

model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                 weights = data['totaltested'], entity_effects = True, time_effects = True)

results = model.fit(cov_type = 'clustered', clusters = data['districtcode'])
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:               mathpass   R-squared:                        0.0513
Estimator:                   PanelOLS   R-squared (Between):             -0.4170
No. Observations:               24734   R-squared (Within):              -0.0697
Date:                Thu, Sep 21 2023   R-squared (Overall):             -0.4045
Time:                        20:58:34   Log-likelihood                -8.694e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      183.48
Entities:                        4348   P-value                           0.0000
Avg Obs:                       5.6886   Distribution:                 F(6,20375)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             29.686
                            

In [13]:
data = pd.read_csv('final_data_all_state_mathpass.csv').iloc[:, 1:]
data['bh_virtual'] = (data['black'] + data['hispanic']) * data['virtualper']
data['bh_hybrid'] = (data['black'] + data['hispanic']) * data['hybridper']
data = data.set_index(['schoolcode', 'year'])

outcome_var = 'mathpass'
exog_vars = ['virtualper', 'hybridper', 'bh_virtual', 'bh_hybrid', 
             'black', 'hispanic', 'lowincome', 'white']

model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                 weights = data['totaltested'], entity_effects = True, time_effects = True)

results = model.fit(cov_type = 'clustered', clusters = data['districtcode'])
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:               mathpass   R-squared:                        0.0683
Estimator:                   PanelOLS   R-squared (Between):             -0.4302
No. Observations:               24734   R-squared (Within):              -0.0706
Date:                Thu, Sep 21 2023   R-squared (Overall):             -0.4172
Time:                        21:06:08   Log-likelihood                -8.671e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      186.73
Entities:                        4348   P-value                           0.0000
Avg Obs:                       5.6886   Distribution:                 F(8,20373)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             25.287
                            

### Income interaction

In [8]:
data = pd.read_csv('final_data_all_state_mathpass.csv').iloc[:, 1:]
data['income_mode'] = data['lowincome'] * data['schoolmode']
data = data.set_index(['schoolcode', 'year'])

outcome_var = 'mathpass'
exog_vars = ['schoolmode', 'income_mode', 'black', 'hispanic', 'lowincome', 'white']

model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                 weights = data['totaltested'], entity_effects = True, time_effects = True)

results = model.fit(cov_type = 'clustered', clusters = data['districtcode'])
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:               mathpass   R-squared:                        0.0327
Estimator:                   PanelOLS   R-squared (Between):             -0.6728
No. Observations:               24734   R-squared (Within):              -0.0688
Date:                Thu, Sep 21 2023   R-squared (Overall):             -0.6510
Time:                        21:00:05   Log-likelihood                -8.718e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      114.83
Entities:                        4348   P-value                           0.0000
Avg Obs:                       5.6886   Distribution:                 F(6,20375)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             19.233
                            

In [14]:
data = pd.read_csv('final_data_all_state_mathpass.csv').iloc[:, 1:]
data['income_virtual'] = data['lowincome'] * data['virtualper']
data['income_hybrid'] = data['lowincome'] * data['hybridper']
data = data.set_index(['schoolcode', 'year'])

outcome_var = 'mathpass'
exog_vars = ['virtualper', 'hybridper', 'income_virtual', 'income_hybrid',
             'black', 'hispanic', 'lowincome', 'white']

model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                 weights = data['totaltested'], entity_effects = True, time_effects = True)

results = model.fit(cov_type = 'clustered', clusters = data['districtcode'])
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:               mathpass   R-squared:                        0.0478
Estimator:                   PanelOLS   R-squared (Between):             -0.6483
No. Observations:               24734   R-squared (Within):              -0.0880
Date:                Thu, Sep 21 2023   R-squared (Overall):             -0.6280
Time:                        21:08:01   Log-likelihood                -8.698e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      127.70
Entities:                        4348   P-value                           0.0000
Avg Obs:                       5.6886   Distribution:                 F(8,20373)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             22.361
                            

### Charter interaction

In [9]:
data = pd.read_csv('final_data_all_state_mathpass.csv').iloc[:, 1:]
data['charter_mode'] = data['charter'] * data['schoolmode']
data = data.set_index(['schoolcode', 'year'])

outcome_var = 'mathpass'
exog_vars = ['schoolmode', 'charter_mode', 'black', 'hispanic', 'lowincome', 'white']

model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                 weights = data['totaltested'], entity_effects = True, time_effects = True)

results = model.fit(cov_type = 'clustered', clusters = data['districtcode'])
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:               mathpass   R-squared:                        0.0331
Estimator:                   PanelOLS   R-squared (Between):             -0.7172
No. Observations:               24734   R-squared (Within):              -0.0680
Date:                Thu, Sep 21 2023   R-squared (Overall):             -0.6938
Time:                        21:02:30   Log-likelihood                -8.717e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      116.25
Entities:                        4348   P-value                           0.0000
Avg Obs:                       5.6886   Distribution:                 F(6,20375)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             18.810
                            

In [15]:
data = pd.read_csv('final_data_all_state_mathpass.csv').iloc[:, 1:]
data['charter_virtual'] = data['charter'] * data['virtualper']
data['charter_hybrid'] = data['charter'] * data['hybridper']
data = data.set_index(['schoolcode', 'year'])

outcome_var = 'mathpass'
exog_vars = ['virtualper', 'hybridper', 'charter_virtual', 'charter_hybrid',
             'black', 'hispanic', 'lowincome', 'white']

model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                 weights = data['totaltested'], entity_effects = True, time_effects = True)

results = model.fit(cov_type = 'clustered', clusters = data['districtcode'])
print(results)

                          PanelOLS Estimation Summary                           
Dep. Variable:               mathpass   R-squared:                        0.0492
Estimator:                   PanelOLS   R-squared (Between):             -0.6479
No. Observations:               24734   R-squared (Within):              -0.0880
Date:                Thu, Sep 21 2023   R-squared (Overall):             -0.6277
Time:                        21:10:05   Log-likelihood                -8.697e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      131.82
Entities:                        4348   P-value                           0.0000
Avg Obs:                       5.6886   Distribution:                 F(8,20373)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             23.908
                            

## School FE + district-year interaction

### Base regression - no interactions

In [30]:
data = pd.read_csv('final_data_all_state_mathpass.csv').iloc[:, 1:].head(50)
data['district_year'] = pd.Categorical(data['districtcode'] + data['year'].astype('str'))
data = data.set_index(['schoolcode', 'year'])

effect = 'district_year'
outcome_var = 'mathpass'
exog_vars = ['schoolmode', 'black', 'hispanic', 'lowincome', 'white']

model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                 weights = data['totaltested'], entity_effects = True, time_effects = True, 
                 other_effects = data[effect])

results = model.fit(cov_type = 'clustered', clusters = data['districtcode'])
print(results)

ValueError: At most two effects supported.