In [1]:
import pandas as pd
from linearmodels.panel import PanelOLS
from linearmodels.iv import AbsorbingLS
import io

In [2]:
def print_results(results):
    
    # UNCOMMENT BELOW TO ONLY SEE COEFFS, STD ERRORS, AND P VALUES

#     results_df = pd.DataFrame({
#         'Parameter Estimates': results.params,
#         'Standard Errors': results.std_errors.round(5),
#         'P-values': results.pvalues.round(5)
#     })

#     print(results_df)
    print(results)

# School FE

## Base regression - no interactions

In [92]:
def base_regression(outcome_var, weight):
    
    data = pd.read_csv(f'final_data_all_state_{outcome_var}.csv').iloc[:, 1:].set_index(['schoolcode', 'year'])
    
    # regressions with schoolmode

    exog_vars = ['schoolmode', 'black', 'hispanic', 'lowincome', 'white']
    model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                     weights = data[weight], entity_effects = True, time_effects = True)
    panel_results_schoolmode = model.fit(cov_type = 'clustered', clusters = data['districtcode'])

    # regression with virtualper and hybridper

    exog_vars = ['virtualper', 'hybridper', 'black', 'hispanic', 'lowincome', 'white']
    model = PanelOLS(dependent = data[outcome_var], exog = data.loc[:, exog_vars], 
                     weights = data[weight], entity_effects = True, time_effects = True)
    panel_results_hybrid_virtual = model.fit(cov_type = 'clustered', clusters = data['districtcode'])

    return [panel_results_schoolmode, panel_results_hybrid_virtual]

### Mathpass

In [93]:
results = base_regression('mathpass', 'totaltested')
print_results(results[0])
print_results(results[1])

                          PanelOLS Estimation Summary                           
Dep. Variable:               mathpass   R-squared:                        0.0323
Estimator:                   PanelOLS   R-squared (Between):             -0.7190
No. Observations:               24734   R-squared (Within):              -0.0678
Date:                Tue, Sep 26 2023   R-squared (Overall):             -0.6955
Time:                        21:51:13   Log-likelihood                -8.718e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      136.04
Entities:                        4348   P-value                           0.0000
Avg Obs:                       5.6886   Distribution:                 F(5,20376)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             22.509
                            

### Elapass

In [94]:
results = base_regression('elapass', 'totaltested')
print_results(results[0])
print_results(results[1])

                          PanelOLS Estimation Summary                           
Dep. Variable:                elapass   R-squared:                        0.0562
Estimator:                   PanelOLS   R-squared (Between):             -0.7668
No. Observations:               18554   R-squared (Within):               0.0106
Date:                Tue, Sep 26 2023   R-squared (Overall):             -0.7581
Time:                        21:51:30   Log-likelihood                -5.778e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      181.18
Entities:                        3318   P-value                           0.0000
Avg Obs:                       5.5919   Distribution:                 F(5,15226)
Min Obs:                       4.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             15.869
                            

### Drop-out

In [95]:
results = base_regression('dropout', 'totalenrolled')
print_results(results[0])
print_results(results[1])

                          PanelOLS Estimation Summary                           
Dep. Variable:                dropout   R-squared:                        0.0057
Estimator:                   PanelOLS   R-squared (Between):             -0.0880
No. Observations:               15568   R-squared (Within):               0.0093
Date:                Tue, Sep 26 2023   R-squared (Overall):             -0.0803
Time:                        21:51:45   Log-likelihood                -3.533e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      13.429
Entities:                        3892   P-value                           0.0000
Avg Obs:                       4.0000   Distribution:                 F(5,11668)
Min Obs:                       4.0000                                           
Max Obs:                       4.0000   F-statistic (robust):             2.9826
                            

# School FE + district-year interaction

## Base regression - no attribute interactions

In [96]:
def year_interaction_regression(outcome_var, weight, effect):
    
    data = pd.read_csv(f'final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['year_interaction'] = data[effect] + data['year'].astype('str')
    
    # regressions with schoolmode
    
    exog_vars = ['schoolmode', 'black', 'hispanic', 'lowincome', 'white']

    absorb = data[['schoolcode', 'year', 'year_interaction']].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_schoolmode = model.fit(cov_type = 'clustered', clusters = data['schoolcode'])
    
    # regressions with virtualper and hybirdper
    
    exog_vars = ['virtualper', 'hybridper','black', 'hispanic', 'lowincome', 'white']

    absorb = data[['schoolcode', 'year', 'year_interaction']].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_hybrid_virtual = model.fit(cov_type='clustered', clusters=data['schoolcode'])

    return [panel_results_schoolmode, panel_results_hybrid_virtual]

### Mathpass

In [97]:
results = year_interaction_regression('mathpass', 'totaltested', 'districtcode')
print_results(results[0])
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:               mathpass   R-squared:                          0.9613
Estimator:               Absorbing LS   Adj. R-squared:                     0.9066
No. Observations:               24734   F-statistic:                        229.46
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0000
Time:                        21:52:07   Distribution:                      chi2(5)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0248
                                        Varaibles Absorbed:              1.449e+04
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
schoolmode     2.7873     1.8619     1.4970     0.1344     -0.8619      6.4366
black         -0.493

### Elapass

In [98]:
results = year_interaction_regression('elapass', 'totaltested', 'districtcode')
print_results(results[0])
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:                elapass   R-squared:                          0.9748
Estimator:               Absorbing LS   Adj. R-squared:                     0.9085
No. Observations:               18554   F-statistic:                        108.73
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0000
Time:                        21:52:20   Distribution:                      chi2(5)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0385
                                        Varaibles Absorbed:              1.344e+04
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
schoolmode     1.5707     1.6474     0.9534     0.3404     -1.6582      4.7996
black         -0.378

### Drop-out

In [99]:
results = year_interaction_regression('dropout', 'totalenrolled', 'districtcode')
print_results(results[0])
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:                dropout   R-squared:                          0.9380
Estimator:               Absorbing LS   Adj. R-squared:                     0.7555
No. Observations:               15568   F-statistic:                        24.045
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0002
Time:                        21:52:30   Distribution:                      chi2(5)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0046
                                        Varaibles Absorbed:              1.161e+04
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
schoolmode    -4.1376     1.0190    -4.0606     0.0000     -6.1347     -2.1405
black          0.012

## Attribute interactions

In [100]:
def year_attribute_interaction_regression(outcome_var, weight, effect, interaction):
    
    data = pd.read_csv(f'final_data_all_state_{outcome_var}.csv').iloc[:, 1:]
    data['year_interaction'] = data[effect] + data['year'].astype('str')
    
    if interaction == 'black_hispanic':
        data[f'{interaction}_int'] = (data['black'] + data['hispanic']) * data['schoolmode']
        data[f'{interaction}_virtual_int'] = (data['black'] + data['hispanic']) * data['virtualper']
        data[f'{interaction}_hybrid_int'] = (data['black'] + data['hispanic']) * data['hybridper']
        
    else:
        data[f'{interaction}_int'] = data[interaction] * data['schoolmode']
        data[f'{interaction}_virtual_int'] = data[interaction] * data['virtualper']
        data[f'{interaction}_hybrid_int'] = data[interaction] * data['hybridper']
        
    # regressions with schoolmode

    exog_vars = ['schoolmode', 'black', 'hispanic', 'lowincome', 'white', f'{interaction}_int']

    absorb = data[['schoolcode', 'year', 'year_interaction']].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_schoolmode = model.fit(cov_type='clustered', clusters=data['schoolcode'])
    
    # regressions with virtualper and hybridper

    exog_vars = ['virtualper', 'hybridper','black', 'hispanic', 'lowincome', 'white', 
                 f'{interaction}_virtual_int', f'{interaction}_hybrid_int']

    absorb = data[['schoolcode', 'year', 'year_interaction']].astype('category')
    model = AbsorbingLS(dependent=data[outcome_var], exog=data[exog_vars], absorb=absorb, weights=data[weight])
    panel_results_hybrid_virtual = model.fit(cov_type='clustered', clusters=data['schoolcode'])

    return [panel_results_schoolmode, panel_results_hybrid_virtual]

## Race interactions 

### Mathpass + black

In [101]:
results = year_attribute_interaction_regression('mathpass', 'totaltested', 'districtcode', 'black')
print_results(results[0])
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:               mathpass   R-squared:                          0.9613
Estimator:               Absorbing LS   Adj. R-squared:                     0.9066
No. Observations:               24734   F-statistic:                        229.37
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0000
Time:                        21:52:46   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0249
                                        Varaibles Absorbed:              1.449e+04
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
schoolmode     2.5848     1.8936     1.3650     0.1722     -1.1265      6.2961
black         -0.493

### Mathpass + hispanic

In [103]:
results = year_attribute_interaction_regression('mathpass', 'totaltested', 'districtcode', 'hispanic')
print_results(results[0])
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:               mathpass   R-squared:                          0.9613
Estimator:               Absorbing LS   Adj. R-squared:                     0.9066
No. Observations:               24734   F-statistic:                        235.42
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0000
Time:                        21:52:57   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0253
                                        Varaibles Absorbed:              1.449e+04
                              Parameter Estimates                               
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
schoolmode       4.9048     2.2984     2.1340     0.0328      0.4000      9.4097
black       

### Mathpass + black/hispanic

In [65]:
results = year_attribute_interaction_regression('mathpass', 'totaltested', 'districtcode', 'black_hispanic')

In [66]:
print_results(results[0])

                         Absorbing LS Estimation Summary                          
Dep. Variable:               mathpass   R-squared:                          0.9613
Estimator:               Absorbing LS   Adj. R-squared:                     0.9066
No. Observations:               24734   F-statistic:                        236.76
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0000
Time:                        21:37:16   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0252
                                        Varaibles Absorbed:              1.449e+04
                                 Parameter Estimates                                  
                    Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------------
schoolmode             5.8388     2.6688     2.1878     0.0287      0.6081 

In [67]:
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:               mathpass   R-squared:                          0.9614
Estimator:               Absorbing LS   Adj. R-squared:                     0.9066
No. Observations:               24734   F-statistic:                        237.11
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0000
Time:                        21:37:17   Distribution:                      chi2(8)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0256
                                        Varaibles Absorbed:              1.449e+04
                                     Parameter Estimates                                      
                            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
----------------------------------------------------------------------------------------------
virtualper                     7.0664     3.0607   

### Elapass + black

In [104]:
results = year_attribute_interaction_regression('elapass', 'totaltested', 'districtcode', 'black')
print_results(results[0])
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:                elapass   R-squared:                          0.9748
Estimator:               Absorbing LS   Adj. R-squared:                     0.9085
No. Observations:               18554   F-statistic:                        109.75
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0000
Time:                        21:53:10   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0385
                                        Varaibles Absorbed:              1.344e+04
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
schoolmode     1.3697     1.7184     0.7971     0.4254     -1.9982      4.7377
black         -0.378

### Elapass + hispanic

In [105]:
results = year_attribute_interaction_regression('elapass', 'totaltested', 'districtcode', 'hispanic')
print_results(results[0])
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:                elapass   R-squared:                          0.9748
Estimator:               Absorbing LS   Adj. R-squared:                     0.9086
No. Observations:               18554   F-statistic:                        119.42
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0000
Time:                        21:53:21   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0391
                                        Varaibles Absorbed:              1.344e+04
                              Parameter Estimates                               
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
schoolmode       3.1747     1.7272     1.8380     0.0661     -0.2106      6.5600
black       

### Elapass + black/hispanic

In [106]:
results = year_attribute_interaction_regression('elapass', 'totaltested', 'districtcode', 'black_hispanic')
print_results(results[0])
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:                elapass   R-squared:                          0.9748
Estimator:               Absorbing LS   Adj. R-squared:                     0.9086
No. Observations:               18554   F-statistic:                        118.88
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0000
Time:                        21:53:32   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0390
                                        Varaibles Absorbed:              1.344e+04
                                 Parameter Estimates                                  
                    Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------------
schoolmode             3.7770     1.9718     1.9155     0.0554     -0.0877 

### Drop-out + black

In [107]:
results = year_attribute_interaction_regression('dropout', 'totalenrolled', 'districtcode', 'black')
print_results(results[0])
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:                dropout   R-squared:                          0.9381
Estimator:               Absorbing LS   Adj. R-squared:                     0.7559
No. Observations:               15568   F-statistic:                        29.656
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0000
Time:                        21:53:43   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0064
                                        Varaibles Absorbed:              1.161e+04
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
schoolmode    -3.7647     0.9940    -3.7876     0.0002     -5.7129     -1.8166
black          0.013

### Drop-out + hispanic

In [81]:
results = year_attribute_interaction_regression('dropout', 'totalenrolled', 'districtcode', 'hispanic')
print_results(results[0])
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:                dropout   R-squared:                          0.9380
Estimator:               Absorbing LS   Adj. R-squared:                     0.7555
No. Observations:               15568   F-statistic:                        26.050
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0002
Time:                        21:44:19   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0046
                                        Varaibles Absorbed:              1.161e+04
                              Parameter Estimates                               
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
schoolmode      -4.0564     1.1844    -3.4249     0.0006     -6.3778     -1.7350
black       

### Dropout + black_hispanic

In [82]:
results = year_attribute_interaction_regression('dropout', 'totalenrolled', 'districtcode', 'black_hispanic')
print_results(results[0])
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:                dropout   R-squared:                          0.9381
Estimator:               Absorbing LS   Adj. R-squared:                     0.7559
No. Observations:               15568   F-statistic:                        39.397
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0000
Time:                        21:44:56   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0063
                                        Varaibles Absorbed:              1.161e+04
                                 Parameter Estimates                                  
                    Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------------
schoolmode            -2.5128     1.2739    -1.9724     0.0486     -5.0096 

## Income interactions

### Mathpass + lowincome

In [86]:
results = year_attribute_interaction_regression('mathpass', 'totaltested', 'districtcode', 'lowincome')
print_results(results[0])
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:               mathpass   R-squared:                          0.9613
Estimator:               Absorbing LS   Adj. R-squared:                     0.9066
No. Observations:               24734   F-statistic:                        229.46
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0000
Time:                        21:47:00   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0249
                                        Varaibles Absorbed:              1.449e+04
                               Parameter Estimates                               
               Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
---------------------------------------------------------------------------------
schoolmode        2.1836     2.4528     0.8902     0.3733     -2.6239      6.9910
black   

### Elapass + lowincome

In [87]:
results = year_attribute_interaction_regression('elapass', 'totaltested', 'districtcode', 'lowincome')
print_results(results[0])
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:                elapass   R-squared:                          0.9748
Estimator:               Absorbing LS   Adj. R-squared:                     0.9085
No. Observations:               18554   F-statistic:                        111.47
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0000
Time:                        21:47:08   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0385
                                        Varaibles Absorbed:              1.344e+04
                               Parameter Estimates                               
               Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
---------------------------------------------------------------------------------
schoolmode        1.9647     1.9627     1.0010     0.3168     -1.8821      5.8115
black   

### Dropout + lowincome

In [83]:
results = year_attribute_interaction_regression('dropout', 'totalenrolled', 'districtcode', 'lowincome')
print_results(results[0])
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:                dropout   R-squared:                          0.9383
Estimator:               Absorbing LS   Adj. R-squared:                     0.7567
No. Observations:               15568   F-statistic:                        48.116
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0000
Time:                        21:45:18   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0095
                                        Varaibles Absorbed:              1.161e+04
                               Parameter Estimates                               
               Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
---------------------------------------------------------------------------------
schoolmode       -1.1642     1.1037    -1.0549     0.2915     -3.3274      0.9989
black   

## Charter interactions

### Mathpass + charter

In [88]:
results = year_attribute_interaction_regression('mathpass', 'totaltested', 'districtcode', 'charter')
print_results(results[0])
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:               mathpass   R-squared:                          0.9614
Estimator:               Absorbing LS   Adj. R-squared:                     0.9066
No. Observations:               24734   F-statistic:                        233.40
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0000
Time:                        21:47:42   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0256
                                        Varaibles Absorbed:              1.449e+04
                              Parameter Estimates                              
             Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
-------------------------------------------------------------------------------
schoolmode      3.5313     1.9007     1.8579     0.0632     -0.1941      7.2566
black          -

### Elapass + charter

In [89]:
results = year_attribute_interaction_regression('elapass', 'totaltested', 'districtcode', 'charter')
print_results(results[0])
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:                elapass   R-squared:                          0.9748
Estimator:               Absorbing LS   Adj. R-squared:                     0.9087
No. Observations:               18554   F-statistic:                        113.87
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0000
Time:                        21:47:54   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0402
                                        Varaibles Absorbed:              1.344e+04
                              Parameter Estimates                              
             Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
-------------------------------------------------------------------------------
schoolmode      2.3729     1.7341     1.3684     0.1712     -1.0258      5.7716
black          -

### Dropout + charter

In [91]:
results = year_attribute_interaction_regression('dropout', 'totalenrolled', 'districtcode', 'charter')
print_results(results[0])
print_results(results[1])

                         Absorbing LS Estimation Summary                          
Dep. Variable:                dropout   R-squared:                          0.9380
Estimator:               Absorbing LS   Adj. R-squared:                     0.7555
No. Observations:               15568   F-statistic:                        24.312
Date:                Tue, Sep 26 2023   P-value (F-stat):                   0.0005
Time:                        21:48:36   Distribution:                      chi2(6)
Cov. Estimator:             clustered   R-squared (No Effects):             0.0046
                                        Varaibles Absorbed:              1.161e+04
                              Parameter Estimates                              
             Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
-------------------------------------------------------------------------------
schoolmode     -4.3228     1.3344    -3.2395     0.0012     -6.9381     -1.7074
black           

In [80]:
# # Create a mapping for variable renaming
rename_dict = {
    'schoolmode': 'Learning Mode',
    'black': 'Black',
    'hispanic': 'Hispanic',
    'white': 'White',
    'lowincome': 'Low Income'
}

# Define the headers for each group of regressions
group_headers = [
    "\\textbf{\\substack{\\text{Dependent Variables} \\\\ \\text{w/ Schoolmode}}}",
    "\\textbf{\\substack{\\text{Dependent Variables} \\\\ \\text{w/o Schoolmode}}}"
]

order = ['schoolmode', 'virtualper', 'hybridper', 'black_int', 'black_virtual_int', 'black_hybrid_int', 'hispanic_int', 'hispanic_virtual_int', 'hispanic_hybrid_int', 'lowincome_int', 'lowincome_virtual_int', 'lowincome_hybrid_int', 'lowincome', 'black', 'hispanic', 'white', 'charter']

rename_dict = {
    'schoolmode': 'Learning Mode',
    'hispanic_int': 'Hispanic I',
    'black_int': 'Black I',
    'virtualper': 'Virtual \%',
    'hybridper': 'Hybrid \%',
    'charter': 'Charter',
    'hispanic_virtual_int': 'Hispanic Virtual I',
    'hispanic_hybrid_int': 'Hispanic Hybrid I',
    'black_virtual_int': 'Black Virtual I',
    'black_hybrid_int': 'Black Hybrid I',
    'lowincome': 'Low Income',
    'female': 'Female',
    'totaltested': 'Total Tested',
    'white': 'White',
    'black': 'Black',
    'hispanic': 'Hispanic',
    'retention': 'Retention',
    'classize_int': 'Class Size I',
    'lowincome_int': 'Low Income I',
    'lowincome_virtual_int': 'Low Income Virtual I',
    'lowincome_hybrid_int': 'Low Income Hybrid I'
}

def get_significance(coeff, std_error):
    z_value = abs(coeff / std_error)
    if z_value >= 1.96:
        return '***'
    elif z_value >= 1.64:
        return '**'
    elif z_value >= 1.28:
        return '*'
    else:
        return ''
    
def get_summary_dictionary_from_table(table):
    summary = {}
    
    for regression_name in table:
        regression = globals()[regression_name]  # Get the variable using its name as a string
        summary[regression_name] = {
            'coefficients': regression.params,
            'standard_errors': regression.std_errors,
            'observations': regression.nobs,
            'name': regression_name.split("_")[0] if regression_name.split("_")[0] != "DROPOUT" else "DROP"
        }
        
    return summary 
    
def create_latex_table(summary, order, rename_dict, variable_grouping, headers_latex):
    latex_table = headers_latex + "\n\\midrule\n"
    
    for var in order:
        row_coeff = []
        row_se = []
        is_control = var in variable_grouping
        is_present_in_any_regression = False  # Flag to check if the variable is present in any regression
        
        if var in variable_grouping:
            latex_table += f"\\textbf{{\\emph{{{variable_grouping[var]}}}}}" + "&  ref.  " * len(summary) + " \\\\ \\addlinespace "
        row_coeff.append(rename_dict.get(var, var))
            
        for regression in summary.values():
            coefficients = regression.get('coefficients', {})
            standard_errors = regression.get('standard_errors', {})
            
            if var in coefficients and var in standard_errors:
                is_present_in_any_regression = True
                coeff = f"{coefficients[var]:.3f}"
                se = f"({standard_errors[var]:.3f})"
                significance = get_significance(coefficients[var], standard_errors[var])  # Assuming this function is defined elsewhere in your code
                row_coeff.append(f"{coeff}\\sym{{{significance}}}")
                row_se.append(se)
            else:
                row_coeff.append(" " if is_control else "ref.")
                row_se.append(" ")
        
        # If the variable is not present in any regression, skip adding rows for this variable
        if not is_present_in_any_regression:
            continue
        
        latex_table += " & ".join(row_coeff) + " \\\\\n"
        latex_table += " & " + " & ".join(row_se) + " \\\\\n"
        latex_table += "\\addlinespace\n"
    
    latex_table += " \midrule \midrule Observations"
    for reg_name, regression in summary.items():
        observation_number_array = []
        for dep_var in summary:
            if dep_var == reg_name:
                observation_number_array.append(f"{regression['observations']}")
        latex_table += " & " + " & ".join(observation_number_array)
    latex_table += " \\\ "
    
    return latex_table

In [81]:
table = ['MATH_NOINT_NOPER', 'ELA_NOINT_NOPER', 'DROPOUT_NOINT_NOPER', 'MATH_NOINT_PER', 'ELA_NOINT_PER', 'DROPOUT_NOINT_PER']
summary = get_summary_dictionary_from_table(table)

header_row1 = " ".join([f"&\\multicolumn{{3}}{{c}}{{{gh}}}" for gh in group_headers]) + " \\\\ \\cmidrule(lr){2-4} \\cmidrule(lr){5-7}"
header_row2 = " ".join([f"&\\multicolumn{{1}}{{c}}{{{summary[regression]['name']}}}" for regression in table]) + " \\\\"
headers_latex = header_row1 + "\n" + header_row2

latex_table_str = create_latex_table(summary, order, rename_dict, {"lowincome": "Controls"}, headers_latex)

with io.open("../tables/table_noint.tex", "w") as f:
    # Write the string to the LaTeX file.
    f.write(latex_table_str)

# Close the LaTeX file.
f.close()

In [82]:
table = ['MATH_DISTRICTYEAR_NOPER', 'ELA_DISTRICTYEAR_NOPER', 'DROPOUT_DISTRICTYEAR_NOPER', 'MATH_DISTRICTYEAR_PER', 'ELA_DISTRICTYEAR_PER', 'DROPOUT_DISTRICTYEAR_PER']

summary = get_summary_dictionary_from_table(table)

header_row1 = " ".join([f"&\\multicolumn{{3}}{{c}}{{{gh}}}" for gh in group_headers]) + " \\\\ \\cmidrule(lr){2-4} \\cmidrule(lr){5-7}"
header_row2 = " ".join([f"&\\multicolumn{{1}}{{c}}{{{summary[regression]['name']}}}" for regression in table]) + " \\\\"
headers_latex = header_row1 + "\n" + header_row2

latex_table_str = create_latex_table(summary, order, rename_dict, {"lowincome": "Controls"}, headers_latex)

with io.open("../tables/table_districtyear.tex", "w") as f:
    # Write the string to the LaTeX file.
    f.write(latex_table_str)

# Close the LaTeX file.
f.close()


In [89]:
table = ['MATH_STATEYEAR_NOPER', 'ELA_STATEYEAR_NOPER', 'DROPOUT_STATEYEAR_NOPER', 'MATH_STATEYEAR_PER', 'ELA_STATEYEAR_PER', 'DROPOUT_STATEYEAR_PER']

summary = get_summary_dictionary_from_table(table)

header_row1 = " ".join([f"&\\multicolumn{{3}}{{c}}{{{gh}}}" for gh in group_headers]) + " \\\\ \\cmidrule(lr){2-4} \\cmidrule(lr){5-7}"
header_row2 = " ".join([f"&\\multicolumn{{1}}{{c}}{{{summary[regression]['name']}}}" for regression in table]) + " \\\\"
headers_latex = header_row1 + "\n" + header_row2

latex_table_str = create_latex_table(summary, order, rename_dict, {"lowincome": "Controls"}, headers_latex)

with io.open("../tables/table_stateyear.tex", "w") as f:
    # Write the string to the LaTeX file.
    f.write(latex_table_str)

# Close the LaTeX file.
f.close()

In [84]:
table = ['MATH_COUNTYYEAR_NOPER', 'ELA_COUNTYYEAR_NOPER', 'DROPOUT_COUNTYYEAR_NOPER', 'MATH_COUNTYYEAR_PER', 'ELA_COUNTYYEAR_PER', 'DROPOUT_COUNTYYEAR_PER']

summary = get_summary_dictionary_from_table(table)

header_row1 = " ".join([f"&\\multicolumn{{3}}{{c}}{{{gh}}}" for gh in group_headers]) + " \\\\ \\cmidrule(lr){2-4} \\cmidrule(lr){5-7}"
header_row2 = " ".join([f"&\\multicolumn{{1}}{{c}}{{{summary[regression]['name']}}}" for regression in table]) + " \\\\"
headers_latex = header_row1 + "\n" + header_row2

latex_table_str = create_latex_table(summary, order, rename_dict, {"lowincome": "Controls"}, headers_latex)

with io.open("../tables/table_countyyear.tex", "w") as f:
    # Write the string to the LaTeX file.
    f.write(latex_table_str)

# Close the LaTeX file.
f.close()

In [85]:
table = ['MATH_DISTRICTYEAR_BLACK_NOPER', 'ELA_DISTRICTYEAR_BLACK_NOPER', 'DROPOUT_DISTRICTYEAR_BLACK_NOPER', 'MATH_DISTRICTYEAR_BLACK_PER', 'ELA_DISTRICTYEAR_BLACK_PER', 'DROPOUT_DISTRICTYEAR_BLACK_PER']

summary = get_summary_dictionary_from_table(table)

header_row1 = " ".join([f"&\\multicolumn{{3}}{{c}}{{{gh}}}" for gh in group_headers]) + " \\\\ \\cmidrule(lr){2-4} \\cmidrule(lr){5-7}"
header_row2 = " ".join([f"&\\multicolumn{{1}}{{c}}{{{summary[regression]['name']}}}" for regression in table]) + " \\\\"
headers_latex = header_row1 + "\n" + header_row2

latex_table_str = create_latex_table(summary, order, rename_dict, {"lowincome": "Controls", "black_int": "Interactions"}, headers_latex)

with io.open("../tables/table_districtyear_black.tex", "w") as f:
    # Write the string to the LaTeX file.
    f.write(latex_table_str)

# Close the LaTeX file.
f.close()

In [86]:
table = ['MATH_DISTRICTYEAR_HISPANIC_NOPER', 'ELA_DISTRICTYEAR_HISPANIC_NOPER', 'DROPOUT_DISTRICTYEAR_HISPANIC_NOPER', 'MATH_DISTRICTYEAR_HISPANIC_PER', 'ELA_DISTRICTYEAR_HISPANIC_PER', 'DROPOUT_DISTRICTYEAR_HISPANIC_PER']

summary = get_summary_dictionary_from_table(table)

header_row1 = " ".join([f"&\\multicolumn{{3}}{{c}}{{{gh}}}" for gh in group_headers]) + " \\\\ \\cmidrule(lr){2-4} \\cmidrule(lr){5-7}"
header_row2 = " ".join([f"&\\multicolumn{{1}}{{c}}{{{summary[regression]['name']}}}" for regression in table]) + " \\\\"
headers_latex = header_row1 + "\n" + header_row2

latex_table_str = create_latex_table(summary, order, rename_dict, {"lowincome": "Controls", "black_int": "Interactions"}, headers_latex)

with io.open("../tables/table_districtyear_hispanic.tex", "w") as f:
    # Write the string to the LaTeX file.
    f.write(latex_table_str)

# Close the LaTeX file.
f.close()

In [87]:
table = ['MATH_DISTRICTYEAR_LOWINCOME_NOPER', 'ELA_DISTRICTYEAR_LOWINCOME_NOPER', 'DROPOUT_DISTRICTYEAR_LOWINCOME_NOPER', 'MATH_DISTRICTYEAR_LOWINCOME_PER', 'ELA_DISTRICTYEAR_LOWINCOME_PER', 'DROPOUT_DISTRICTYEAR_LOWINCOME_PER']

summary = get_summary_dictionary_from_table(table)

header_row1 = " ".join([f"&\\multicolumn{{3}}{{c}}{{{gh}}}" for gh in group_headers]) + " \\\\ \\cmidrule(lr){2-4} \\cmidrule(lr){5-7}"
header_row2 = " ".join([f"&\\multicolumn{{1}}{{c}}{{{summary[regression]['name']}}}" for regression in table]) + " \\\\"
headers_latex = header_row1 + "\n" + header_row2

latex_table_str = create_latex_table(summary, order, rename_dict, {"lowincome": "Controls", "black_int": "Interactions"}, headers_latex)

with io.open("../tables/table_districtyear_lowincome.tex", "w") as f:
    # Write the string to the LaTeX file.
    f.write(latex_table_str)

# Close the LaTeX file.
f.close()

In [88]:
table = ['MATH_DISTRICTYEAR_CHARTER_NOPER', 'ELA_DISTRICTYEAR_CHARTER_NOPER', 'DROPOUT_DISTRICTYEAR_CHARTER_NOPER', 'MATH_DISTRICTYEAR_CHARTER_PER', 'ELA_DISTRICTYEAR_CHARTER_PER', 'DROPOUT_DISTRICTYEAR_CHARTER_PER']

summary = get_summary_dictionary_from_table(table)

header_row1 = " ".join([f"&\\multicolumn{{3}}{{c}}{{{gh}}}" for gh in group_headers]) + " \\\\ \\cmidrule(lr){2-4} \\cmidrule(lr){5-7}"
header_row2 = " ".join([f"&\\multicolumn{{1}}{{c}}{{{summary[regression]['name']}}}" for regression in table]) + " \\\\"
headers_latex = header_row1 + "\n" + header_row2

latex_table_str = create_latex_table(summary, order, rename_dict, {"lowincome": "Controls", "black_int": "Interactions"}, headers_latex)

with io.open("../tables/table_districtyear_charter.tex", "w") as f:
    # Write the string to the LaTeX file.
    f.write(latex_table_str)

# Close the LaTeX file.
f.close()