In [1]:
import pandas as pd
from linearmodels.panel import PanelOLS
from linearmodels.iv import AbsorbingLS

In [2]:
def print_summarized_results(results):
    # UNCOMMENT BELOW TO ONLY SEE COEFFS, STD ERRORS, AND P VALUES
    
#     # Extract parameter estimates, standard errors and p-values
#     params = results.params
#     std_errors = results.std_errors.round(5)  # Round the standard errors to 5 decimal places
#     pvalues = results.pvalues.round(5)

#     # Construct a DataFrame
#     results_df = pd.DataFrame({
#         'Parameter Estimates': params,
#         'Standard Errors': std_errors,
#         'P-values': pvalues
#     })

#     # Print the resulting DataFrame
#     print(results_df)
    print(results)

# School FE

### Preliminary No Interactions Regression Function

In [3]:
def run_preliminary_regression(outcome_var, weight):
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:].set_index(['schoolcode', 'year'])

    outcome_var = outcome_var
    exog_vars = ['schoolmode', 'black', 'hispanic', 'lowincome', 'white']

    model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                     weights = data[weight], entity_effects = True, time_effects = True)

    panel_results_no_per = model.fit(cov_type = 'clustered', clusters = data['districtcode'])

    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:].set_index(['schoolcode', 'year'])

    outcome_var = outcome_var
    exog_vars = ['virtualper', 'hybridper', 'black', 'hispanic', 'lowincome', 'white']

    model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                     weights = data[weight], entity_effects = True, time_effects = True)
    
    panel_results_per = model.fit(cov_type = 'clustered', clusters = data['districtcode'])

    return [panel_results_no_per, panel_results_per]

#### MATH

In [4]:
results = run_preliminary_regression('mathpass', 'totaltested')

In [5]:
MATH_NOINT_NOPER = results[0]
print_summarized_results(MATH_NOINT_NOPER)

                          PanelOLS Estimation Summary                           
Dep. Variable:               mathpass   R-squared:                        0.0323
Estimator:                   PanelOLS   R-squared (Between):             -0.7190
No. Observations:               24734   R-squared (Within):              -0.0678
Date:                Fri, Sep 22 2023   R-squared (Overall):             -0.6955
Time:                        17:26:43   Log-likelihood                -8.718e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      136.04
Entities:                        4348   P-value                           0.0000
Avg Obs:                       5.6886   Distribution:                 F(5,20376)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             22.509
                            

In [6]:
MATH_NOINT_PER = results[1]
print_summarized_results(MATH_NOINT_PER)

                          PanelOLS Estimation Summary                           
Dep. Variable:               mathpass   R-squared:                        0.0453
Estimator:                   PanelOLS   R-squared (Between):             -0.6614
No. Observations:               24734   R-squared (Within):              -0.0913
Date:                Fri, Sep 22 2023   R-squared (Overall):             -0.6408
Time:                        17:26:43   Log-likelihood                -8.702e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      161.12
Entities:                        4348   P-value                           0.0000
Avg Obs:                       5.6886   Distribution:                 F(6,20375)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             28.516
                            

#### ELA

In [7]:
results = run_preliminary_regression('elapass', 'totaltested')

In [8]:
ELA_NOINT_NOPER = results[0]
print_summarized_results(ELA_NOINT_NOPER)

                          PanelOLS Estimation Summary                           
Dep. Variable:                elapass   R-squared:                        0.0562
Estimator:                   PanelOLS   R-squared (Between):             -0.7668
No. Observations:               18554   R-squared (Within):               0.0106
Date:                Fri, Sep 22 2023   R-squared (Overall):             -0.7581
Time:                        17:26:44   Log-likelihood                -5.778e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      181.18
Entities:                        3318   P-value                           0.0000
Avg Obs:                       5.5919   Distribution:                 F(5,15226)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             15.869
                            

In [9]:
ELA_NOINT_PER = results[1]
print_summarized_results(ELA_NOINT_PER)

                          PanelOLS Estimation Summary                           
Dep. Variable:                elapass   R-squared:                        0.0675
Estimator:                   PanelOLS   R-squared (Between):             -0.7819
No. Observations:               18554   R-squared (Within):               0.0122
Date:                Fri, Sep 22 2023   R-squared (Overall):             -0.7730
Time:                        17:26:44   Log-likelihood                -5.767e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      183.67
Entities:                        3318   P-value                           0.0000
Avg Obs:                       5.5919   Distribution:                 F(6,15225)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             20.807
                            

#### DROPOUT

# School FE + State-Year Interaction

### School FE + State-Year Regression Function

In [10]:
def run_state_year_regression(outcome_var, weight):
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['state_year'] = data['state'] + data['year'].astype('str')

    effect = 'state_year'
    outcome_var = outcome_var
    exog_vars = ['schoolmode', 'black', 'hispanic', 'lowincome', 'white']

    absorb = data[['schoolcode', 'year', effect]].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_no_per = model.fit(cov_type='clustered', clusters=data['schoolcode'])
    
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['state_year'] = data['state'] + data['year'].astype('str')

    effect = 'state_year'
    outcome_var = outcome_var
    exog_vars = ['virtualper', 'hybridper','black', 'hispanic', 'lowincome', 'white']

    absorb = data[['schoolcode', 'year', effect]].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_per = model.fit(cov_type='clustered', clusters=data['schoolcode'])

    return [panel_results_no_per, panel_results_per]

#### MATH

In [11]:
results = run_state_year_regression('mathpass', 'totaltested')

In [12]:
MATH_STATEYEAR__NOPER = results[0]
print_summarized_results(MATH_STATEYEAR__NOPER)

                         Absorbing LS Estimation Summary                          
Dep. Variable:               mathpass   R-squared:                          0.9182
Estimator:               Absorbing LS   Adj. R-squared:                     0.9007
No. Observations:               24734   F-statistic:                        291.53
Date:                Fri, Sep 22 2023   P-value (F-stat):                   0.0000
Time:                        17:26:45   Distribution:                      chi2(5)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0319
                                        Varaibles Absorbed:                 4368.0
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
schoolmode    -8.9480     0.9053    -9.8838     0.0000     -10.722     -7.1736
black         -0.467

In [13]:
MATH_STATEYEAR__NOPER = results[1]
print_summarized_results(MATH_STATEYEAR__NOPER)

                         Absorbing LS Estimation Summary                          
Dep. Variable:               mathpass   R-squared:                          0.9183
Estimator:               Absorbing LS   Adj. R-squared:                     0.9007
No. Observations:               24734   F-statistic:                        294.08
Date:                Fri, Sep 22 2023   P-value (F-stat):                   0.0000
Time:                        17:26:45   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0323
                                        Varaibles Absorbed:                 4368.0
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
virtualper    -9.2024     0.9230    -9.9706     0.0000     -11.011     -7.3935
hybridper     -7.246

#### ELA

In [14]:
results = run_state_year_regression('elapass', 'totaltested')

In [15]:
ELA_STATEYEAR_NOPER = results[0]
print_summarized_results(ELA_STATEYEAR_NOPER)

                         Absorbing LS Estimation Summary                          
Dep. Variable:                elapass   R-squared:                          0.9393
Estimator:               Absorbing LS   Adj. R-squared:                     0.9260
No. Observations:               18554   F-statistic:                        177.92
Date:                Fri, Sep 22 2023   P-value (F-stat):                   0.0000
Time:                        17:26:45   Distribution:                      chi2(5)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0556
                                        Varaibles Absorbed:                 3338.0
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
schoolmode    -0.5433     0.4986    -1.0898     0.2758     -1.5205      0.4338
black         -0.542

In [16]:
ELA_STATEYEAR__PER = results[1]
print_summarized_results(ELA_STATEYEAR__PER)

                         Absorbing LS Estimation Summary                          
Dep. Variable:                elapass   R-squared:                          0.9393
Estimator:               Absorbing LS   Adj. R-squared:                     0.9260
No. Observations:               18554   F-statistic:                        180.84
Date:                Fri, Sep 22 2023   P-value (F-stat):                   0.0000
Time:                        17:26:45   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0559
                                        Varaibles Absorbed:                 3338.0
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
virtualper    -0.3886     0.5036    -0.7717     0.4403     -1.3756      0.5984
hybridper      1.369

#### DROPOUT

# School FE + District-Year Interaction

### School FE + District-Year Regression Function

In [17]:
def run_district_year_regression(outcome_var, weight):
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['district_year'] = data['districtcode'] + data['year'].astype('str')

    effect = 'district_year'
    outcome_var = outcome_var
    exog_vars = ['schoolmode', 'black', 'hispanic', 'lowincome', 'white']

    absorb = data[['schoolcode', 'year', effect]].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_no_per = model.fit(cov_type='clustered', clusters=data['schoolcode'])
    
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['district_year'] = data['districtcode'] + data['year'].astype('str')

    effect = 'district_year'
    outcome_var = outcome_var
    exog_vars = ['virtualper', 'hybridper','black', 'hispanic', 'lowincome', 'white']

    absorb = data[['schoolcode', 'year', effect]].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_per = model.fit(cov_type='clustered', clusters=data['schoolcode'])

    return [panel_results_no_per, panel_results_per]

#### MATH

In [18]:
results = run_district_year_regression('mathpass', 'totaltested')

In [19]:
MATH_DISTRICTYEAR__NOPER = results[0]
print_summarized_results(MATH_DISTRICTYEAR__NOPER)

                         Absorbing LS Estimation Summary                          
Dep. Variable:               mathpass   R-squared:                          0.9613
Estimator:               Absorbing LS   Adj. R-squared:                     0.9066
No. Observations:               24734   F-statistic:                        229.46
Date:                Fri, Sep 22 2023   P-value (F-stat):                   0.0000
Time:                        17:26:46   Distribution:                      chi2(5)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0248
                                        Varaibles Absorbed:              1.449e+04
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
schoolmode     2.7873     1.8619     1.4970     0.1344     -0.8619      6.4366
black         -0.493

In [20]:
MATH_DISTRICTYEAR__NOPER = results[1]
print_summarized_results(MATH_DISTRICTYEAR__NOPER)

                         Absorbing LS Estimation Summary                          
Dep. Variable:               mathpass   R-squared:                          0.9613
Estimator:               Absorbing LS   Adj. R-squared:                     0.9066
No. Observations:               24734   F-statistic:                        230.33
Date:                Fri, Sep 22 2023   P-value (F-stat):                   0.0000
Time:                        17:26:46   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0251
                                        Varaibles Absorbed:              1.449e+04
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
virtualper     3.1201     1.8827     1.6572     0.0975     -0.5700      6.8101
hybridper      7.240

#### ELA

In [21]:
results = run_district_year_regression('elapass', 'totaltested')

In [22]:
ELA_DISTRICTYEAR_NOPER = results[0]
print_summarized_results(ELA_DISTRICTYEAR_NOPER)

                         Absorbing LS Estimation Summary                          
Dep. Variable:                elapass   R-squared:                          0.9748
Estimator:               Absorbing LS   Adj. R-squared:                     0.9085
No. Observations:               18554   F-statistic:                        108.73
Date:                Fri, Sep 22 2023   P-value (F-stat):                   0.0000
Time:                        17:26:47   Distribution:                      chi2(5)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0385
                                        Varaibles Absorbed:              1.344e+04
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
schoolmode     1.5707     1.6474     0.9534     0.3404     -1.6582      4.7996
black         -0.378

In [23]:
ELA_DISTRICTYEAR__PER = results[1]
print_summarized_results(ELA_DISTRICTYEAR__PER)

                         Absorbing LS Estimation Summary                          
Dep. Variable:                elapass   R-squared:                          0.9748
Estimator:               Absorbing LS   Adj. R-squared:                     0.9086
No. Observations:               18554   F-statistic:                        114.25
Date:                Fri, Sep 22 2023   P-value (F-stat):                   0.0000
Time:                        17:26:47   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0391
                                        Varaibles Absorbed:              1.344e+04
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
virtualper     1.9828     1.6608     1.1939     0.2325     -1.2723      5.2379
hybridper      7.054

#### DROPOUT

# School FE + County-Year Interaction

### School FE + County-Year Regression Function

In [24]:
def run_county_year_regression(outcome_var, weight):
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['county_year'] = data['countycode'] + data['year'].astype('str')

    effect = 'county_year'
    outcome_var = outcome_var
    exog_vars = ['schoolmode', 'black', 'hispanic', 'lowincome', 'white']

    absorb = data[['schoolcode', 'year', effect]].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_no_per = model.fit(cov_type='clustered', clusters=data['schoolcode'])
    
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['county_year'] = data['countycode'] + data['year'].astype('str')

    effect = 'county_year'
    outcome_var = outcome_var
    exog_vars = ['virtualper', 'hybridper','black', 'hispanic', 'lowincome', 'white']

    absorb = data[['schoolcode', 'year', effect]].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_per = model.fit(cov_type='clustered', clusters=data['schoolcode'])

    return [panel_results_no_per, panel_results_per]

#### MATH

In [None]:
results = run_county_year_regression('mathpass', 'totaltested')

In [None]:
MATH_COUNTYYEAR__NOPER = results[0]
print_summarized_results(MATH_COUNTYYEAR__NOPER)

In [None]:
MATH_COUNTYYEAR__NOPER = results[1]
print_summarized_results(MATH_COUNTYYEAR__NOPER)

#### ELA

In [None]:
results = run_county_year_regression('elapass', 'totaltested')

In [None]:
ELA_COUNTYYEAR_NOPER = results[0]
print_summarized_results(ELA_COUNTYYEAR_NOPER)

In [None]:
ELA_COUNTYYEAR__PER = results[1]
print_summarized_results(ELA_COUNTYYEAR__PER)

#### DROPOUT

# School FE + District-Year Interaction + Race Interactions

### School FE + District-Year Interaction + Interactions Regression Function

In [None]:
def run_district_interaction_year_regression(outcome_var, weight, interaction):
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['district_year'] = data['districtcode'] + data['year'].astype('str')
    data[f'{interaction}_int'] = data[interaction] * data['schoolmode']

    effect = 'district_year'
    outcome_var = outcome_var
    exog_vars = ['schoolmode', 'black', 'hispanic', 'lowincome', 'white', f'{interaction}_int']

    absorb = data[['schoolcode', 'year', effect]].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_no_per = model.fit(cov_type='clustered', clusters=data['schoolcode'])
    
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['district_year'] = data['districtcode'] + data['year'].astype('str')
    data[f'{interaction}_virtual_per'] = data[interaction] * data['virtualper']
    data[f'{interaction}_hybrid_per'] = data[interaction] * data['hybridper']

    effect = 'district_year'
    outcome_var = outcome_var
    exog_vars = ['virtualper', 'hybridper','black', 'hispanic', 'lowincome', 'white', f'{interaction}_virtual_per', f'{interaction}_hybrid_per']

    absorb = data[['schoolcode', 'year', effect]].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_per = model.fit(cov_type='clustered', clusters=data['schoolcode'])

    return [panel_results_no_per, panel_results_per]

#### MATH - BLACK

In [None]:
results = run_district_interaction_year_regression('mathpass', 'totaltested', 'black')

In [None]:
MATH_DISTRICTYEAR_BLACK_NOPER = results[0]
print_summarized_results(MATH_DISTRICTYEAR_BLACK_NOPER)

In [None]:
MATH_DISTRICTYEAR_BLACK_PER = results[1]
print_summarized_results(MATH_DISTRICTYEAR_BLACK_PER)

#### ELA - BLACK

In [None]:
results = run_district_interaction_year_regression('elapass', 'totaltested', 'black')

In [None]:
ELA_DISTRICTYEAR_BLACK_NOPER = results[0]
print_summarized_results(ELA_DISTRICTYEAR_BLACK_NOPER)

In [None]:
ELA_DISTRICTYEAR_BLACK_PER = results[1]
print_summarized_results(ELA_DISTRICTYEAR_BLACK_PER)

#### DROPOUT - BLACK

#### ELA - HISPANIC

In [None]:
results = run_district_interaction_year_regression('elapass', 'totaltested', 'hispanic')

In [None]:
ELA_DISTRICTYEAR_HISPANIC_NOPER = results[0]
print_summarized_results(ELA_DISTRICTYEAR_HISPANIC_NOPER)

In [None]:
ELA_DISTRICTYEAR_HISPANIC_PER = results[1]
print_summarized_results(ELA_DISTRICTYEAR_HISPANIC_PER)

#### DROPOUT - BLACK

# School FE + District-Year Interaction + Other Interactions

### School FE + District-Year Interaction + Income Interactions Regression Function

#### MATH - LOWINCOME

In [None]:
results = run_district_interaction_year_regression('mathpass', 'totaltested', 'lowincome')

In [None]:
MATH_DISTRICTYEAR_LOWINCOME_NOPER = results[0]
print_summarized_results(MATH_DISTRICTYEAR_LOWINCOME_NOPER)

In [None]:
MATH_DISTRICTYEAR_LOWINCOME_PER = results[1]
print_summarized_results(MATH_DISTRICTYEAR_LOWINCOME_PER)

#### ELA - LOWINCOME

In [None]:
results = run_district_interaction_year_regression('elapass', 'totaltested', 'lowincome')

In [None]:
ELA_DISTRICTYEAR_LOWINCOME_NOPER = results[0]
print_summarized_results(ELA_DISTRICTYEAR_LOWINCOME_NOPER)

In [None]:
ELA_DISTRICTYEAR_LOWINCOME_PER = results[1]
print_summarized_results(ELA_DISTRICTYEAR_LOWINCOME_PER)

#### DROPOUT - LOWINCOME

#### MATH - CHARTER

In [None]:
results = run_district_interaction_year_regression('mathpass', 'totaltested', 'charter')

In [None]:
MATH_DISTRICTYEAR_CHARTER_NOPER = results[0]
print_summarized_results(MATH_DISTRICTYEAR_CHARTER_NOPER)

In [None]:
MATH_DISTRICTYEAR_CHARTER_PER = results[1]
print_summarized_results(MATH_DISTRICTYEAR_CHARTER_PER)

#### ELA - CHARTER

In [None]:
results = run_district_interaction_year_regression('elapass', 'totaltested', 'charter')

In [None]:
ELA_DISTRICTYEAR_CHARTER_NOPER = results[0]
print_summarized_results(ELA_DISTRICTYEAR_CHARTER_NOPER)

In [None]:
ELA_DISTRICTYEAR_CHARTER_PER = results[1]
print_summarized_results(ELA_DISTRICTYEAR_CHARTER_PER)

#### DROPOUT - CHARTER