In [1]:
import pandas as pd
from linearmodels.panel import PanelOLS
from linearmodels.iv import AbsorbingLS

In [2]:
def print_summarized_results(results):
    # Extract parameter estimates, standard errors and p-values
    params = results.params
    std_errors = results.std_errors.round(5)  # Round the standard errors to 5 decimal places
    pvalues = results.pvalues.round(5)

    # Construct a DataFrame
    results_df = pd.DataFrame({
        'Parameter Estimates': params,
        'Standard Errors': std_errors,
        'P-values': pvalues
    })

    # Print the resulting DataFrame
    print(results_df)

# School FE

### Preliminary No Interactions Regression Function

In [3]:
def run_preliminary_regression(outcome_var, weight):
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:].set_index(['schoolcode', 'year'])

    outcome_var = outcome_var
    exog_vars = ['schoolmode', 'black', 'hispanic', 'lowincome', 'white']

    model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                     weights = data[weight], entity_effects = True, time_effects = True)

    panel_results_no_per = model.fit(cov_type = 'clustered', clusters = data['districtcode'])

    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:].set_index(['schoolcode', 'year'])

    outcome_var = outcome_var
    exog_vars = ['virtualper', 'hybridper', 'black', 'hispanic', 'lowincome', 'white']

    model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                     weights = data[weight], entity_effects = True, time_effects = True)
    
    panel_results_per = model.fit(cov_type = 'clustered', clusters = data['districtcode'])

    return [panel_results_no_per, panel_results_per]

#### MATH

In [4]:
results = run_preliminary_regression('mathpass', 'totaltested')

In [5]:
MATH_NOINT_NOPER = results[0]
print_summarized_results(MATH_NOINT_NOPER)

            Parameter Estimates  Standard Errors  P-values
schoolmode             7.937898          1.90246   0.00003
black                 -0.408005          0.05607   0.00000
hispanic              -0.222579          0.04658   0.00000
lowincome             -0.046789          0.02930   0.11030
white                 -0.029814          0.04407   0.49870


In [6]:
MATH_NOINT_PER = results[1]
print_summarized_results(MATH_NOINT_PER)

            Parameter Estimates  Standard Errors  P-values
virtualper             6.013950          1.97230   0.00230
hybridper             14.115143          1.84865   0.00000
black                 -0.392413          0.05586   0.00000
hispanic              -0.215000          0.04632   0.00000
lowincome             -0.046255          0.02800   0.09851
white                 -0.013614          0.04422   0.75821


#### ELA

In [7]:
results = run_preliminary_regression('elapass', 'totaltested')

In [8]:
ELA_NOINT_NOPER = results[0]
print_summarized_results(ELA_NOINT_NOPER)

            Parameter Estimates  Standard Errors  P-values
schoolmode            -3.222789          0.77700   0.00003
black                 -0.464732          0.08860   0.00000
hispanic              -0.340029          0.06455   0.00000
lowincome             -0.019332          0.01700   0.25551
white                  0.132937          0.04030   0.00097


In [9]:
ELA_NOINT_PER = results[1]
print_summarized_results(ELA_NOINT_PER)

            Parameter Estimates  Standard Errors  P-values
virtualper            -2.060941          0.77918   0.00818
hybridper             -8.139389          1.02333   0.00000
black                 -0.467170          0.08816   0.00000
hispanic              -0.338219          0.06401   0.00000
lowincome             -0.019323          0.01600   0.22731
white                  0.121126          0.03910   0.00195


#### DROPOUT

# School FE + State-Year Interaction

### School FE + State-Year Regression Function

In [10]:
def run_state_year_regression(outcome_var, weight):
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['state_year'] = data['state'] + data['year'].astype('str')

    effect = 'state_year'
    outcome_var = outcome_var
    exog_vars = ['schoolmode', 'black', 'hispanic', 'lowincome', 'white']

    absorb = data[['schoolcode', 'year', effect]].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_no_per = model.fit(cov_type='clustered', clusters=data['schoolcode'])
    
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['state_year'] = data['state'] + data['year'].astype('str')

    effect = 'state_year'
    outcome_var = outcome_var
    exog_vars = ['virtualper', 'hybridper','black', 'hispanic', 'lowincome', 'white']

    absorb = data[['schoolcode', 'year', effect]].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_per = model.fit(cov_type='clustered', clusters=data['schoolcode'])

    return [panel_results_no_per, panel_results_per]

#### MATH

In [11]:
results = run_state_year_regression('mathpass', 'totaltested')

In [12]:
MATH_STATEYEAR__NOPER = results[0]
print_summarized_results(MATH_STATEYEAR__NOPER)

            Parameter Estimates  Standard Errors  P-values
schoolmode            -8.948009          0.90532   0.00000
black                 -0.466955          0.04518   0.00000
hispanic              -0.271946          0.03474   0.00000
lowincome             -0.019455          0.01161   0.09377
white                 -0.005329          0.03185   0.86714


In [13]:
MATH_STATEYEAR__NOPER = results[1]
print_summarized_results(MATH_STATEYEAR__NOPER)

            Parameter Estimates  Standard Errors  P-values
virtualper            -9.202422          0.92295   0.00000
hybridper             -7.246941          1.07611   0.00000
black                 -0.467133          0.04516   0.00000
hispanic              -0.271774          0.03471   0.00000
lowincome             -0.019108          0.01161   0.09989
white                 -0.005544          0.03185   0.86180


#### ELA

In [14]:
results = run_state_year_regression('elapass', 'totaltested')

In [15]:
ELA_STATEYEAR_NOPER = results[0]
print_summarized_results(ELA_STATEYEAR_NOPER)

            Parameter Estimates  Standard Errors  P-values
schoolmode            -0.543339          0.49857   0.27581
black                 -0.542196          0.05558   0.00000
hispanic              -0.372745          0.04434   0.00000
lowincome             -0.010435          0.00786   0.18445
white                  0.095805          0.02967   0.00124


In [16]:
ELA_STATEYEAR__PER = results[1]
print_summarized_results(ELA_STATEYEAR__PER)

            Parameter Estimates  Standard Errors  P-values
virtualper            -0.388614          0.50360   0.44031
hybridper              1.369056          0.79252   0.08408
black                 -0.542342          0.05560   0.00000
hispanic              -0.372953          0.04439   0.00000
lowincome             -0.010684          0.00785   0.17346
white                  0.096054          0.02969   0.00122


#### DROPOUT

# School FE + District-Year Interaction

### School FE + District-Year Regression Function

In [17]:
def run_district_year_regression(outcome_var, weight):
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['district_year'] = data['districtcode'] + data['year'].astype('str')

    effect = 'district_year'
    outcome_var = outcome_var
    exog_vars = ['schoolmode', 'black', 'hispanic', 'lowincome', 'white']

    absorb = data[['schoolcode', 'year', effect]].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_no_per = model.fit(cov_type='clustered', clusters=data['schoolcode'])
    
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['district_year'] = data['districtcode'] + data['year'].astype('str')

    effect = 'district_year'
    outcome_var = outcome_var
    exog_vars = ['virtualper', 'hybridper','black', 'hispanic', 'lowincome', 'white']

    absorb = data[['schoolcode', 'year', effect]].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_per = model.fit(cov_type='clustered', clusters=data['schoolcode'])

    return [panel_results_no_per, panel_results_per]

#### MATH

In [18]:
results = run_district_year_regression('mathpass', 'totaltested')

In [19]:
MATH_DISTRICTYEAR__NOPER = results[0]
print_summarized_results(MATH_DISTRICTYEAR__NOPER)

            Parameter Estimates  Standard Errors  P-values
schoolmode             2.787333          1.86190   0.13438
black                 -0.492969          0.04593   0.00000
hispanic              -0.356298          0.03720   0.00000
lowincome             -0.032914          0.01812   0.06928
white                 -0.055144          0.03521   0.11727


In [20]:
MATH_DISTRICTYEAR__NOPER = results[1]
print_summarized_results(MATH_DISTRICTYEAR__NOPER)

            Parameter Estimates  Standard Errors  P-values
virtualper             3.120072          1.88272   0.09748
hybridper              7.240133          4.07947   0.07594
black                 -0.492509          0.04592   0.00000
hispanic              -0.356058          0.03720   0.00000
lowincome             -0.032671          0.01812   0.07140
white                 -0.055055          0.03519   0.11768


#### ELA

In [21]:
results = run_district_year_regression('elapass', 'totaltested')

In [22]:
ELA_DISTRICTYEAR_NOPER = results[0]
print_summarized_results(ELA_DISTRICTYEAR_NOPER)

            Parameter Estimates  Standard Errors  P-values
schoolmode             1.570688          1.64743   0.34038
black                 -0.378346          0.06152   0.00000
hispanic              -0.310484          0.05918   0.00000
lowincome             -0.091577          0.01733   0.00000
white                  0.047133          0.05260   0.37019


In [23]:
ELA_DISTRICTYEAR__PER = results[1]
print_summarized_results(ELA_DISTRICTYEAR__PER)

            Parameter Estimates  Standard Errors  P-values
virtualper             1.982812          1.66078   0.23252
hybridper              7.054339          3.36015   0.03578
black                 -0.377494          0.06148   0.00000
hispanic              -0.309827          0.05915   0.00000
lowincome             -0.091272          0.01732   0.00000
white                  0.047470          0.05257   0.36649


#### DROPOUT

# School FE + County-Year Interaction

### School FE + County-Year Regression Function

In [24]:
def run_county_year_regression(outcome_var, weight):
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['county_year'] = data['countycode'] + data['year'].astype('str')

    effect = 'county_year'
    outcome_var = outcome_var
    exog_vars = ['schoolmode', 'black', 'hispanic', 'lowincome', 'white']

    absorb = data[['schoolcode', 'year', effect]].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_no_per = model.fit(cov_type='clustered', clusters=data['schoolcode'])
    
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['county_year'] = data['countycode'] + data['year'].astype('str')

    effect = 'county_year'
    outcome_var = outcome_var
    exog_vars = ['virtualper', 'hybridper','black', 'hispanic', 'lowincome', 'white']

    absorb = data[['schoolcode', 'year', effect]].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_per = model.fit(cov_type='clustered', clusters=data['schoolcode'])

    return [panel_results_no_per, panel_results_per]

#### MATH

In [25]:
results = run_county_year_regression('mathpass', 'totaltested')

In [26]:
MATH_COUNTYYEAR__NOPER = results[0]
print_summarized_results(MATH_COUNTYYEAR__NOPER)

            Parameter Estimates  Standard Errors  P-values
schoolmode            -2.196939          1.07283   0.04058
black                 -0.468591          0.04593   0.00000
hispanic              -0.328953          0.03713   0.00000
lowincome             -0.018612          0.01228   0.12949
white                 -0.064818          0.03355   0.05334


In [27]:
MATH_COUNTYYEAR__NOPER = results[1]
print_summarized_results(MATH_COUNTYYEAR__NOPER)

            Parameter Estimates  Standard Errors  P-values
virtualper            -2.194687          1.10198   0.04642
hybridper             -1.146561          1.14358   0.31605
black                 -0.468600          0.04593   0.00000
hispanic              -0.328956          0.03713   0.00000
lowincome             -0.018615          0.01228   0.12943
white                 -0.064826          0.03355   0.05331


#### ELA

In [28]:
results = run_county_year_regression('elapass', 'totaltested')

In [29]:
ELA_COUNTYYEAR_NOPER = results[0]
print_summarized_results(ELA_COUNTYYEAR_NOPER)

            Parameter Estimates  Standard Errors  P-values
schoolmode            -0.457448          0.67461   0.49771
black                 -0.553335          0.05627   0.00000
hispanic              -0.420568          0.04535   0.00000
lowincome             -0.025892          0.00897   0.00392
white                  0.018193          0.02987   0.54244


In [30]:
ELA_COUNTYYEAR__PER = results[1]
print_summarized_results(ELA_COUNTYYEAR__PER)

            Parameter Estimates  Standard Errors  P-values
virtualper            -0.178748          0.68548   0.79428
hybridper              1.937417          0.94095   0.03949
black                 -0.553020          0.05628   0.00000
hispanic              -0.420347          0.04539   0.00000
lowincome             -0.025886          0.00897   0.00391
white                  0.018781          0.02986   0.52942


#### DROPOUT

# School FE + District-Year Interaction + Race Interactions

### School FE + District-Year Interaction + Race Interactions Regression Function

In [32]:
def run_district_race_year_regression(outcome_var, weight, race):
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['district_year'] = data['districtcode'] + data['year'].astype('str')
    data[f'{race}_int'] = data[race] * data['schoolmode']

    effect = 'district_year'
    outcome_var = outcome_var
    exog_vars = ['schoolmode', 'black', 'hispanic', 'lowincome', 'white', f'{race}_int']

    absorb = data[['schoolcode', 'year', effect]].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_no_per = model.fit(cov_type='clustered', clusters=data['schoolcode'])
    
    data = pd.read_csv(f'../final_data_all_state/final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['district_year'] = data['districtcode'] + data['year'].astype('str')
    data[f'{race}_virtual_per'] = data[race] * data['virtualper']
    data[f'{race}_hybrid_per'] = data[race] * data['hybridper']

    effect = 'district_year'
    outcome_var = outcome_var
    exog_vars = ['virtualper', 'hybridper','black', 'hispanic', 'lowincome', 'white', f'{race}_virtual_per', f'{race}_hybrid_per']

    absorb = data[['schoolcode', 'year', effect]].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_per = model.fit(cov_type='clustered', clusters=data['schoolcode'])

    return [panel_results_no_per, panel_results_per]

#### MATH - BLACK

In [33]:
results = run_district_race_year_regression('mathpass', 'totaltested', 'black')

In [34]:
MATH_DISTRICTYEAR_BLACK_NOPER = results[0]
print_summarized_results(MATH_DISTRICTYEAR_BLACK_NOPER)

            Parameter Estimates  Standard Errors  P-values
schoolmode             2.584783          1.89355   0.17224
black                 -0.493364          0.04597   0.00000
hispanic              -0.356305          0.03721   0.00000
lowincome             -0.033086          0.01813   0.06800
white                 -0.055281          0.03522   0.11647
black_int              0.012170          0.01895   0.52083


In [35]:
MATH_DISTRICTYEAR_BLACK_PER = results[1]
print_summarized_results(MATH_DISTRICTYEAR_BLACK_PER)

                   Parameter Estimates  Standard Errors  P-values
virtualper                    3.441647          1.91219   0.07189
hybridper                     5.265981          4.11034   0.20014
black                        -0.492471          0.04595   0.00000
hispanic                     -0.355734          0.03720   0.00000
lowincome                    -0.033158          0.01813   0.06740
white                        -0.055098          0.03520   0.11748
black_virtual_per            -0.017899          0.02590   0.48958
black_hybrid_per              0.109442          0.04910   0.02582


#### ELA - BLACK

In [36]:
results = run_district_race_year_regression('elapass', 'totaltested', 'black')

In [37]:
ELA_DISTRICTYEAR_BLACK_NOPER = results[0]
print_summarized_results(ELA_DISTRICTYEAR_BLACK_NOPER)

            Parameter Estimates  Standard Errors  P-values
schoolmode             1.369746          1.71837   0.42538
black                 -0.378633          0.06153   0.00000
hispanic              -0.310346          0.05919   0.00000
lowincome             -0.091756          0.01733   0.00000
white                  0.047206          0.05260   0.36952
black_int              0.012435          0.01617   0.44182


In [38]:
ELA_DISTRICTYEAR_BLACK_PER = results[1]
print_summarized_results(ELA_DISTRICTYEAR_BLACK_PER)

                   Parameter Estimates  Standard Errors  P-values
virtualper                    2.038927          1.78661   0.25378
hybridper                     6.033871          3.50546   0.08520
black                        -0.377862          0.06150   0.00000
hispanic                     -0.309567          0.05915   0.00000
lowincome                    -0.091733          0.01733   0.00000
white                         0.047264          0.05257   0.36865
black_virtual_per            -0.002489          0.02078   0.90468
black_hybrid_per              0.058002          0.04227   0.17000


#### DROPOUT