In [3]:
!pip install linearmodels
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.stats.api as sms
from sklearn.metrics import mean_squared_error
from scipy import stats
from linearmodels.panel import PanelOLS, RandomEffects
from scipy.stats import chi2
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.stats.diagnostic import het_breuschpagan
from google.colab import files

Collecting linearmodels
  Downloading linearmodels-6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Collecting mypy-extensions>=0.4 (from linearmodels)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)
Collecting pyhdfe>=0.1 (from linearmodels)
  Downloading pyhdfe-0.2.0-py3-none-any.whl (19 kB)
Collecting formulaic>=1.0.0 (from linearmodels)
  Downloading formulaic-1.0.1-py3-none-any.whl (94 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.2/94.2 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting setuptools-scm[toml]<9.0.0,>=8.0.0 (from linearmodels)
  Downloading setuptools_scm-8.0.4-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.1/42.1 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting interface-meta>=1.2.0 (from formulaic>=1.0.0->linearmodels

In [4]:
def dfbetas(y, X, w, coef_index, ckk):
    fe = PanelOLS(y, X, weights = w, entity_effects = True, time_effects = True).fit()
    og_params = fe.params[coef_index]
    dfbetas = [None] * X.shape[0]

    for i in X.index:
        X_removed = X.drop(i)
        y_removed = y.drop(i)
        w_removed = w.drop(i)

        model_removed = PanelOLS(y_removed, X_removed, weights = w_removed,
                                 entity_effects = True, time_effects = True).fit()
        new_params = model_removed.params[coef_index]
        diff = og_params - new_params
        mse_removed = mean_squared_error(y_removed, model_removed.predict().fitted_values)
        loc = X.index.get_loc(i)
        dfbetas[loc] = (diff / np.sqrt(mse_removed * ckk))
    return dfbetas

# Before Removal

# Math Proficiency Rate

In [10]:
# compute C_kk
in_math = pd.read_csv('indiana_mathpass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_math, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']].reset_index().drop(columns = 'index')
data['year'] = data['year'].astype('str')
data['black_virtual'] = data['black'] * data['virtualper']
data['black_hybrid'] = data['black'] * data['hybridper']
data['hispanic_virtual'] = data['hispanic'] * data['virtualper']
data['hispanic_hybrid'] = data['hispanic'] * data['hybridper']
data['lowincome_virtual'] = data['lowincome'] * data['virtualper']
data['lowincome_hybrid'] = data['lowincome'] * data['hybridper']

dummy_variables = pd.get_dummies(data[['mergecode', 'year', 'state']], drop_first = True, dtype = 'int')
data = pd.concat([data, dummy_variables], axis=1).drop(columns = ['year', 'mergecode', 'state'])

data['in_virtual'] = data['state_indiana'] * data['virtualper']
data['wi_virtual'] = data['state_wisconsin'] * data['virtualper']
data['in_hybrid'] = data['state_indiana'] * data['hybridper']
data['wi_hybrid'] = data['state_wisconsin'] * data['hybridper']

exog = sm.add_constant(data.drop(columns = ['mathpass', 'totaltest', 'state_indiana', 'state_wisconsin']))

XtX_inv = np.linalg.inv(np.dot(exog.T, exog))
ckk = np.diag(XtX_inv)

# critical value
critical_val = 2 / np.sqrt(data.shape[0])

In [13]:
in_math = pd.read_csv('indiana_mathpass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_math, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']].reset_index().drop(columns = 'index')

dummy_variables = pd.get_dummies(data[['state']], drop_first = True, dtype = 'int')
data = pd.concat([data, dummy_variables], axis=1).drop(columns = ['state'])
data['in_virtual'] = data['state_indiana'] * data['virtualper']
data['wi_virtual'] = data['state_wisconsin'] * data['virtualper']
data['in_hybrid'] = data['state_indiana'] * data['hybridper']
data['wi_hybrid'] = data['state_wisconsin'] * data['hybridper']

data['year'] = pd.to_datetime(data['year'], format='%y')
data = data.drop(columns = ['state_indiana', 'state_wisconsin']).set_index(['mergecode', 'year'])

data['black_virtual'] = data['black'] * data['virtualper']
data['black_hybrid'] = data['black'] * data['hybridper']
data['hispanic_virtual'] = data['hispanic'] * data['virtualper']
data['hispanic_hybrid'] = data['hispanic'] * data['hybridper']
data['lowincome_virtual'] = data['lowincome'] * data['virtualper']
data['lowincome_hybrid'] = data['lowincome'] * data['hybridper']

exog = data.drop(columns = ['totaltest', 'mathpass'])
exog = sm.add_constant(exog)

In [None]:
virtualper_inf = dfbetas(data.mathpass, exog, data.totaltest, 1, ckk = ckk[1])
df = pd.DataFrame()
df['mathpass_virtualper_inf'] = virtualper_inf
df.to_csv('mathpass_virtualper_influence.csv')
files.download(f"./mathpass_virtualper_influence.csv")

In [None]:
hybridper_inf = dfbetas(data.mathpass, exog, data.totaltest, 2, ckk = ckk[2])
df = pd.DataFrame()
df['mathpass_hybridper_inf'] = hybridper_inf
df.to_csv('mathpass_hybridper_influence.csv')
files.download(f"./mathpass_hybridper_influence.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
in_virtualper_inf = dfbetas(data.mathpass, exog, data.totaltest, 7, ckk = ckk[1446])
df = pd.DataFrame()
df['mathpass_in_virtualper_inf'] = in_virtualper_inf
files.download(f"./mathpass_in_virtualper_influence.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
wi_virtualper_inf = dfbetas(data.mathpass, exog, data.totaltest, 8, ckk = ckk[1447])
df = pd.DataFrame()
df['mathpass_wi_virtualper_inf'] = wi_virtualper_inf
df.to_csv('mathpass_wi_virtualper_influence.csv')
files.download(f"./mathpass_wi_virtualper_influence.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
in_hybridper_inf = dfbetas(data.mathpass, exog, data.totaltest, 9, ckk = ckk[1448])
df = pd.DataFrame()
df['mathpass_in_hybridper_inf'] = in_hybridper_inf
df.to_csv('mathpass_in_hybridper_influence.csv')
files.download(f"./mathpass_in_hybridper_influence.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
wi_hybridper_inf = dfbetas(data.mathpass, exog, data.totaltest, 10, ckk = ckk[1449])
df = pd.DataFrame()
df['mathpass_wi_hybridper_inf'] = wi_hybridper_inf
df.to_csv('mathpass_wi_hybridper_influence.csv')
files.download(f"./mathpass_wi_hybridper_influence.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
black_virtualper_inf = dfbetas(data.mathpass, exog, data.totaltest, 11, ckk = ckk[7])
df = pd.DataFrame()
df['mathpass_black_virtualper_inf'] = black_virtualper_inf
df.to_csv('mathpass_black_virtualper_influence.csv')
files.download(f"./mathpass_black_virtualper_influence.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
black_hybridper_inf = dfbetas(data.mathpass, exog, data.totaltest, 12, ckk = ckk[8])
df = pd.DataFrame()
df['mathpass_black_hybridper_inf'] = black_hybridper_inf
df.to_csv('mathpass_black_hybridper_influence.csv')
files.download(f"./mathpass_black_hybridper_influence.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
hispanic_virtualper_inf = dfbetas(data.mathpass, exog, data.totaltest, 13, ckk = ckk[9])
df = pd.DataFrame()
df['mathpass_hispanic_virtualper_inf'] = hispanic_virtualper_inf
df.to_csv('mathpass_hispanic_virtualper_influence.csv')
files.download(f"./mathpass_hispanic_virtualper_influence.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
hispanic_hybridper_inf = dfbetas(data.mathpass, exog, data.totaltest, 14, ckk = ckk[10])
df = pd.DataFrame()
df['mathpass_hispanic_hybridper_inf'] = hispanic_hybridper_inf
df.to_csv('mathpass_hispanic_hybridper_influence.csv')
files.download(f"./mathpass_hispanic_hybridper_influence.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
ed_virtualper_inf = dfbetas(data.mathpass, exog, data.totaltest, 15, ckk = ckk[11])
df = pd.DataFrame()
df['mathpass_ed_virtualper_inf'] = ed_virtualper_inf
df.to_csv('mathpass_ed_virtualper_influence.csv')
files.download(f"./mathpass_ed_virtualper_influence.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
ed_hybridper_inf = dfbetas(data.mathpass, exog, data.totaltest, 16, ckk = ckk[12])
df = pd.DataFrame()
df['mathpass_ed_hybridper_inf'] = ed_hybridper_inf
df.to_csv('mathpass_ed_hybridper_influence.csv')
files.download(f"./mathpass_ed_hybridper_influence.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# ELA Proficiency Rate

In [None]:
# compute C_kk
in_ela = pd.read_csv('indiana_elapass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_ela, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'elapass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']].reset_index().drop(columns = 'index')
data['year'] = data['year'].astype('str')
data['black_virtual'] = data['black'] * data['virtualper']
data['black_hybrid'] = data['black'] * data['hybridper']
data['hispanic_virtual'] = data['hispanic'] * data['virtualper']
data['hispanic_hybrid'] = data['hispanic'] * data['hybridper']
data['lowincome_virtual'] = data['lowincome'] * data['virtualper']
data['lowincome_hybrid'] = data['lowincome'] * data['hybridper']

dummy_variables = pd.get_dummies(data[['mergecode', 'year', 'state']], drop_first = True, dtype = 'int')
data = pd.concat([data, dummy_variables], axis=1).drop(columns = ['year', 'mergecode', 'state'])

data['in_virtual'] = data['state_indiana'] * data['virtualper']
data['wi_virtual'] = data['state_wisconsin'] * data['virtualper']
data['in_hybrid'] = data['state_indiana'] * data['hybridper']
data['wi_hybrid'] = data['state_wisconsin'] * data['hybridper']

exog = sm.add_constant(data.drop(columns = ['elapass', 'totaltest', 'state_indiana', 'state_wisconsin']))

XtX_inv = np.linalg.inv(np.dot(exog.T, exog))
ckk = np.diag(XtX_inv)

# critical value
critical_val = 2 / np.sqrt(data.shape[0])
critical_val

In [None]:
in_ela = pd.read_csv('new_indiana_elapass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_ela, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'elapass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]
dummy_variables = pd.get_dummies(data[['state']], drop_first = True, dtype = 'int')
data = pd.concat([data, dummy_variables], axis=1).drop(columns = ['state'])
data['in_virtual'] = data['state_indiana'] * data['virtualper']
data['wi_virtual'] = data['state_wisconsin'] * data['virtualper']
data['in_hybrid'] = data['state_indiana'] * data['hybridper']
data['wi_hybrid'] = data['state_wisconsin'] * data['hybridper']

data['year'] = pd.to_datetime(data['year'], format='%y')
data = data.drop(columns = ['state_indiana', 'state_wisconsin']).set_index(['mergecode', 'year'])

data['black_virtual'] = data['black'] * data['virtualper']
data['black_hybrid'] = data['black'] * data['hybridper']
data['hispanic_virtual'] = data['hispanic'] * data['virtualper']
data['hispanic_hybrid'] = data['hispanic'] * data['hybridper']
data['lowincome_virtual'] = data['lowincome'] * data['virtualper']
data['lowincome_hybrid'] = data['lowincome'] * data['hybridper']

exog = data.drop(columns = ['totaltest', 'elapass'])
exog = sm.add_constant(exog)

In [None]:
virtualper_inf = dfbetas(data.elapass, exog, data.totaltest, 1, ckk = ckk[1])
df = pd.DataFrame()
df['elapass_virtualper_inf'] = virtualper_inf
df.to_csv('elapass_virtualper_influence.csv')
files.download(f"./elapass_virtualper_influence.csv")

In [None]:
hybridper_inf = dfbetas(data.elapass, exog, data.totaltest, 2, ckk = ckk[2])
df = pd.DataFrame()
df['elapass_hybridper_inf'] = hybridper_inf
df.to_csv('elapass_hybridper_influence.csv')
files.download(f"./elapass_hybridper_influence.csv")

In [None]:
in_virtualper_inf = dfbetas(data.elapass, exog, data.totaltest, 7, ckk = ckk[1446])
df = pd.DataFrame()
df['elapass_in_virtualper_inf'] = in_virtualper_inf
df.to_csv('elapass_in_virtualper_influence.csv')
from google.colab import files
files.download(f"./elapass_in_virtualper_influence.csv")

In [None]:
wi_virtualper_inf = dfbetas(data.elapass, exog, data.totaltest, 8, ckk = ckk[1447])
df = pd.DataFrame()
df['elapass_wi_virtualper_inf'] = wi_virtualper_inf
df.to_csv('elapass_wi_virtualper_influence.csv')
files.download(f"./elapass_wi_virtualper_influence.csv")

In [None]:
in_hybridper_inf = dfbetas(data.elapass, exog, data.totaltest, 9, ckk = ckk[1448])
df = pd.DataFrame()
df['elapass_in_hybridper_inf'] = in_hybridper_inf
df.to_csv('elapass_in_hybridper_influence.csv')
files.download(f"./elapass_in_hybridper_influence.csv")

In [None]:
wi_hybridper_inf = dfbetas(data.elapass, exog, data.totaltest, 10, ckk = ckk[1449])
df = pd.DataFrame()
df['wi_elapass_hybridper_inf'] = wi_hybridper_inf
df.to_csv('elapass_wi_hybridper_influence.csv')
files.download(f"./elapass_wi_hybridper_influence.csv")

In [None]:
black_virtualper_inf = dfbetas(data.elapass, exog, data.totaltest, 11, ckk = ckk[7])
df = pd.DataFrame()
df['elapass_black_virtualper_inf'] = black_virtualper_inf
df.to_csv('elapass_black_virtualper_influence.csv')
files.download(f"./elapass_black_virtualper_influence.csv")

In [None]:
black_hybridper_inf = dfbetas(data.elapass, exog, data.totaltest, 12, ckk = ckk[8])
df = pd.DataFrame()
df['black_elapass_hybridper_inf'] = black_hybridper_inf
df.to_csv('elapass_black_hybridper_influence.csv')
from google.colab import files
files.download(f"./elapass_black_hybridper_influence.csv")

In [None]:
hispanic_virtualper_inf = dfbetas(data.elapass, exog, data.totaltest, 13, ckk = ckk[9])
df = pd.DataFrame()
df['elapass_hispanic_virtualper_inf'] = hispanic_virtualper_inf
df.to_csv('elapass_hispanic_virtualper_influence.csv')
files.download(f"./elapass_hispanic_virtualper_influence.csv")

In [None]:
hispanic_hybridper_inf = dfbetas(data.elapass, exog, data.totaltest, 14, ckk = ckk[10])
df = pd.DataFrame()
df['hispanic_elapass_hybridper_inf'] = hispanic_hybridper_inf
df.to_csv('elapass_hispanic_hybridper_influence.csv')
files.download(f"./elapass_hispanic_hybridper_influence.csv")

In [None]:
ed_virtualper_inf = dfbetas(data.elapass, exog, data.totaltest, 15, ckk = ckk[11])
df = pd.DataFrame()
df['elapass_ed_virtualper_inf'] = ed_virtualper_inf
df.to_csv('elapass_ed_virtualper_influence.csv')
files.download(f"./elapass_ed_virtualper_influence.csv")

# Dropout Rate

In [18]:
az = pd.read_csv('arizona_dropout.csv')
co = pd.read_csv('colorado_dropout.csv')
ga = pd.read_csv('georgia_dropout.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([az, co, ga, wi]).loc[:, ['year', 'mergecode', 'state', 'totalenroll', 'droprate',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']].reset_index().drop(columns = 'index')

data['year'] = data['year'].astype('str')
data['black_virtual'] = data['black'] * data['virtualper']
data['black_hybrid'] = data['black'] * data['hybridper']
data['hispanic_virtual'] = data['hispanic'] * data['virtualper']
data['hispanic_hybrid'] = data['hispanic'] * data['hybridper']
data['lowincome_virtual'] = data['lowincome'] * data['virtualper']
data['lowincome_hybrid'] = data['lowincome'] * data['hybridper']

dummy_variables = pd.get_dummies(data[['mergecode', 'year', 'state']], drop_first = True, dtype = 'int')
data = pd.concat([data, dummy_variables], axis=1).drop(columns = ['year', 'mergecode', 'state'])

data['co_virtual'] = data['state_colorado'] * data['virtualper']
data['ga_virtual'] = data['state_georgia'] * data['virtualper']
data['wi_virtual'] = data['state_wisconsin'] * data['virtualper']
data['co_hybrid'] = data['state_colorado'] * data['hybridper']
data['ga_hybrid'] = data['state_georgia'] * data['hybridper']
data['wi_hybrid'] = data['state_wisconsin'] * data['hybridper']

exog = sm.add_constant(data.drop(columns = ['droprate', 'totalenroll', 'state_colorado', 'state_georgia', 'state_wisconsin']))

XtX_inv = np.linalg.inv(np.dot(exog.T, exog))
ckk = np.diag(XtX_inv)

# critical value
critical_val = 2 / np.sqrt(data.shape[0])

In [None]:
az = pd.read_csv('arizona_dropout.csv')
co = pd.read_csv('colorado_dropout.csv')
ga = pd.read_csv('georgia_dropout.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([az, co, ga, wi]).loc[:, ['year', 'mergecode', 'state', 'totalenroll', 'droprate',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']].reset_index().drop(columns = 'index')

dummy_variables = pd.get_dummies(data[['state']], drop_first = True, dtype = 'int')
data = pd.concat([data, dummy_variables], axis=1).drop(columns = ['state'])
data['co_virtual'] = data['state_colorado'] * data['virtualper']
data['wi_virtual'] = data['state_wisconsin'] * data['virtualper']
data['co_hybrid'] = data['state_colorado'] * data['hybridper']
data['ga_hybrid'] = data['state_georgia'] * data['hybridper']
data['wi_hybrid'] = data['state_wisconsin'] * data['hybridper']

data['year'] = pd.to_datetime(data['year'], format='%y')
data = data.drop(columns = ['state_colorado', 'state_georgia',
                            'state_wisconsin']).set_index(['mergecode', 'year'])

data['black_virtual'] = data['black'] * data['virtualper']
data['black_hybrid'] = data['black'] * data['hybridper']
data['hispanic_virtual'] = data['hispanic'] * data['virtualper']
data['hispanic_hybrid'] = data['hispanic'] * data['hybridper']
data['lowincome_virtual'] = data['lowincome'] * data['virtualper']
data['lowincome_hybrid'] = data['lowincome'] * data['hybridper']

exog = sm.add_constant(data.drop(columns = ['droprate', 'totalenroll']))

In [None]:
virtualper_inf = dfbetas(data.droprate, exog, data.totalenroll, 1, ckk = ckk[1])
df = pd.DataFrame()
df['drop_virtual'] = virtualper_inf
df.to_csv('drop_virtual.csv')
from google.colab import files
files.download(f"./drop_virtual.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
hybridper_inf = dfbetas(data.droprate, exog, data.totalenroll, 2, ckk = ckk[2])
df = pd.DataFrame()
df['drop_hybrid'] = hybridper_inf
df.to_csv('drop_hybrid.csv')
files.download(f"./drop_hybrid.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
co_virtualper_inf = dfbetas(data.elapass, exog, data.totaltest, 7, ckk = ckk[1413])
df = pd.DataFrame()
df['drop_co_virtual'] = co_virtualper_inf
df.to_csv('drop_co_virtual.csv')
files.download(f"./drop_co_virtual.csv")

In [None]:
ga_virtualper_inf = dfbetas(data.elapass, exog, data.totaltest, 8, ckk = ckk[1414])
df = pd.DataFrame()
df['drop_ga_virtual'] = ga_virtualper_inf
df.to_csv('drop_ga_virtual.csv')
files.download(f"./drop_ga_virtual.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
wi_virtualper_inf = dfbetas(data.elapass, exog, data.totaltest, 9, ckk = ckk[1415])
df = pd.DataFrame()
df['drop_wi_virtual'] = wi_virtualper_inf
df.to_csv('drop_wi_virtual.csv')
files.download(f"./drop_wi_virtual.csv")

In [None]:
co_hybridper_inf = dfbetas(data.droprate, exog, data.totalenroll, 10, ckk = ckk[1416])
df = pd.DataFrame()
df['drop_co_hybrid'] = co_hybridper_inf
df.to_csv('drop_co_hybrid.csv')
files.download(f"./drop_co_hybrid.csv")

In [None]:
ga_hybridper_inf = dfbetas(data.droprate, exog, data.totalenroll, 11, ckk = ckk[1417])
df = pd.DataFrame()
df['drop_ga_hybrid'] = ga_hybridper_inf
df.to_csv('drop_ga_hybrid.csv')
files.download(f"./drop_ga_hybrid.csv")

In [None]:
wi_hybridper_inf = dfbetas(data.droprate, exog, data.totalenroll, 12, ckk = ckk[1418])
df = pd.DataFrame()
df['drop_wi_hybrid'] = wi_hybridper_inf
df.to_csv('drop_wi_hybrid.csv')
files.download(f"./drop_wi_hybrid.csv")

In [None]:
black_virtualper_inf = dfbetas(data.droprate, exog, data.totalenroll, 13, ckk = ckk[7])
df = pd.DataFrame()
df['drop_black_virtual'] = black_virtualper_inf
df.to_csv('drop_black_virtual.csv')
files.download(f"./drop_black_virtual.csv")

In [None]:
black_hybridper_inf = dfbetas(data.droprate, exog, data.totalenroll, 14, ckk = ckk[8])
df = pd.DataFrame()
df['drop_black_hybrid'] = black_hybridper_inf
df.to_csv('drop_black_hybrid.csv')
files.download(f"./drop_black_hybrid.csv")

In [None]:
his_virtualper_inf = dfbetas(data.droprate, exog, data.totalenroll, 15, ckk = ckk[9])
df = pd.DataFrame()
df['drop_his_virtual'] = his_virtualper_inf
df.to_csv('drop_his_virtual.csv')
files.download(f"./drop_his_virtual.csv")

In [None]:
his_hybridper_inf = dfbetas(data.droprate, exog, data.totalenroll, 16, ckk = ckk[10])
df = pd.DataFrame()
df['drop_his_hybrid'] = his_hybridper_inf
df.to_csv('drop_his_hybrid.csv')
files.download(f"./drop_his_hybrid.csv")

In [None]:
ed_virtualper_inf = dfbetas(data.droprate, exog, data.totalenroll, 17, ckk = ckk[11])
df = pd.DataFrame()
df['drop_ed_virtual'] = ed_virtualper_inf
df.to_csv('drop_ed_virtual.csv')
files.download(f"./drop_ed_virtual.csv")

In [None]:
ed_hybridper_inf = dfbetas(data.droprate, exog, data.totalenroll, 18, ckk = ckk[12])
df = pd.DataFrame()
df['drop_ed_hybrid'] = ed_hybridper_inf
df.to_csv('drop_ed_hybrid.csv')
files.download(f"./drop_ed_hybrid.csv")

# After Removal

# Math Proficiency Rate

In [21]:
drop = pd.read_csv('math_drop.csv').math_drop.to_list()

# compute C_kk
in_math = pd.read_csv('indiana_mathpass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_math, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']].reset_index().drop(columns = 'index')

data = data.loc[~data['mergecode'].isin(drop), :]
data['year'] = data['year'].astype('str')
data['black_virtual'] = data['black'] * data['virtualper']
data['black_hybrid'] = data['black'] * data['hybridper']
data['hispanic_virtual'] = data['hispanic'] * data['virtualper']
data['hispanic_hybrid'] = data['hispanic'] * data['hybridper']
data['lowincome_virtual'] = data['lowincome'] * data['virtualper']
data['lowincome_hybrid'] = data['lowincome'] * data['hybridper']

dummy_variables = pd.get_dummies(data[['mergecode', 'year', 'state']], drop_first = True, dtype = 'int')
data = pd.concat([data, dummy_variables], axis=1).drop(columns = ['year', 'mergecode', 'state'])

data['in_virtual'] = data['state_indiana'] * data['virtualper']
data['wi_virtual'] = data['state_wisconsin'] * data['virtualper']
data['in_hybrid'] = data['state_indiana'] * data['hybridper']
data['wi_hybrid'] = data['state_wisconsin'] * data['hybridper']

exog = sm.add_constant(data.drop(columns = ['mathpass', 'totaltest', 'state_indiana', 'state_wisconsin']))

XtX_inv = np.linalg.inv(np.dot(exog.T, exog))
ckk = np.diag(XtX_inv)

# critical value
critical_val = 2 / np.sqrt(data.shape[0])

In [None]:
in_math = pd.read_csv('indiana_mathpass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_math, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']].reset_index().drop(columns = 'index')

data = data.loc[~data['mergecode'].isin(drop), :]

dummy_variables = pd.get_dummies(data[['state']], drop_first = True, dtype = 'int')
data = pd.concat([data, dummy_variables], axis=1).drop(columns = ['state'])
data['in_virtual'] = data['state_indiana'] * data['virtualper']
data['wi_virtual'] = data['state_wisconsin'] * data['virtualper']
data['in_hybrid'] = data['state_indiana'] * data['hybridper']
data['wi_hybrid'] = data['state_wisconsin'] * data['hybridper']

data['year'] = pd.to_datetime(data['year'], format='%y')
data = data.drop(columns = ['state_indiana', 'state_wisconsin']).set_index(['mergecode', 'year'])

data['black_virtual'] = data['black'] * data['virtualper']
data['black_hybrid'] = data['black'] * data['hybridper']
data['hispanic_virtual'] = data['hispanic'] * data['virtualper']
data['hispanic_hybrid'] = data['hispanic'] * data['hybridper']
data['lowincome_virtual'] = data['lowincome'] * data['virtualper']
data['lowincome_hybrid'] = data['lowincome'] * data['hybridper']

exog = data.drop(columns = ['totaltest', 'mathpass'])
exog = sm.add_constant(exog)

In [None]:
virtualper_inf = dfbetas(data.mathpass, exog, data.totaltest, 1, ckk = ckk[1])
df = pd.DataFrame()
df['mathpass_virtualper_inf'] = virtualper_inf
df.to_csv('mathpass_virtualper_influence.csv')
files.download(f"./mathpass_virtualper_influence.csv")

In [None]:
hybridper_inf = dfbetas(data.mathpass, exog, data.totaltest, 2, ckk = ckk[2])
df = pd.DataFrame()
df['mathpass_hybridper_inf'] = hybridper_inf
df.to_csv('mathpass_hybridper_influence.csv')
files.download(f"./mathpass_hybridper_influence.csv")

In [None]:
in_virtualper_inf = dfbetas(data.mathpass, exog, data.totaltest, 7, ckk = ckk[1283])
df = pd.DataFrame()
df['mathpass_in_virtualper_inf'] = in_virtualper_inf
df.to_csv('mathpass_in_virtualper_influence.csv')
files.download(f"./mathpass_in_virtualper_influence.csv")

In [None]:
wi_virtualper_inf = dfbetas(data.mathpass, exog, data.totaltest, 8, ckk = ckk[1284])
df = pd.DataFrame()
df['mathpass_wi_virtualper_inf'] = wi_virtualper_inf
df.to_csv('mathpass_wi_virtualper_influence.csv')
files.download(f"./mathpass_wi_virtualper_influence.csv")

In [None]:
in_hybridper_inf = dfbetas(data.mathpass, exog, data.totaltest, 9, ckk = ckk[1285])
df = pd.DataFrame()
df['mathpass_in_hybridper_inf'] = in_hybridper_inf
df.to_csv('mathpass_in_hybridper_influence.csv')
files.download(f"./mathpass_in_hybridper_influence.csv")

In [None]:
wi_hybridper_inf = dfbetas(data.mathpass, exog, data.totaltest, 10, ckk = ckk[1286])
df = pd.DataFrame()
df['mathpass_wi_hybridper_inf'] = wi_hybridper_inf
df.to_csv('mathpass_wi_hybridper_influence.csv')
files.download(f"./mathpass_wi_hybridper_influence.csv")

In [None]:
black_virtualper_inf = dfbetas(data.mathpass, exog, data.totaltest, 11, ckk = ckk[7])
df = pd.DataFrame()
df['mathpass_black_virtualper_inf'] = black_virtualper_inf
df.to_csv('mathpass_black_virtualper_influence.csv')
files.download(f"./mathpass_black_virtualper_influence.csv")

In [None]:
black_hybridper_inf = dfbetas(data.mathpass, exog, data.totaltest, 12, ckk = ckk[8])
df = pd.DataFrame()
df['mathpass_black_hybridper_inf'] = black_hybridper_inf
df.to_csv('mathpass_black_hybridper_influence.csv')
files.download(f"./mathpass_black_hybridper_influence.csv")

In [None]:
hispanic_virtualper_inf = dfbetas(data.mathpass, exog, data.totaltest, 13, ckk = ckk[9])
df = pd.DataFrame()
df['mathpass_hispanic_virtualper_inf'] = hispanic_virtualper_inf
df.to_csv('mathpass_hispanic_virtualper_influence.csv')
files.download(f"./mathpass_hispanic_virtualper_influence.csv")

In [None]:
hispanic_hybridper_inf = dfbetas(data.mathpass, exog, data.totaltest, 14, ckk = ckk[10])
df = pd.DataFrame()
df['mathpass_hispanic_hybridper_inf'] = hispanic_hybridper_inf
df.to_csv('mathpass_hispanic_hybridper_influence.csv')
files.download(f"./mathpass_hispanic_hybridper_influence.csv")

In [None]:
ed_virtualper_inf = dfbetas(data.mathpass, exog, data.totaltest, 15, ckk = ckk[11])
df = pd.DataFrame()
df['mathpass_ed_virtualper_inf'] = ed_virtualper_inf
df.to_csv('mathpass_ed_virtualper_influence.csv')
files.download(f"./mathpass_ed_virtualper_influence.csv")

In [None]:
ed_hybridper_inf = dfbetas(data.mathpass, exog, data.totaltest, 16, ckk = ckk[12])
df = pd.DataFrame()
df['mathpass_ed_hybridper_inf'] = ed_hybridper_inf
df.to_csv('mathpass_ed_hybridper_influence.csv')
files.download(f"./mathpass_ed_hybridper_influence.csv")

# ELA Proficiency Rate

In [None]:
drop = pd.read_csv('ela_inf_remove.csv').ela_drop.to_list()

in_ela = pd.read_csv('indiana_elapass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_ela, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'elapass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')

data['year'] = data['year'].astype('str')
data['black_virtual'] = data['black'] * data['virtualper']
data['black_hybrid'] = data['black'] * data['hybridper']
data['hispanic_virtual'] = data['hispanic'] * data['virtualper']
data['hispanic_hybrid'] = data['hispanic'] * data['hybridper']
data['lowincome_virtual'] = data['lowincome'] * data['virtualper']
data['lowincome_hybrid'] = data['lowincome'] * data['hybridper']

dummy_variables = pd.get_dummies(data[['mergecode', 'year', 'state']], drop_first = True, dtype = 'int')
data = pd.concat([data, dummy_variables], axis=1).drop(columns = ['year', 'mergecode', 'state'])

data['in_virtual'] = data['state_indiana'] * data['virtualper']
data['wi_virtual'] = data['state_wisconsin'] * data['virtualper']
data['in_hybrid'] = data['state_indiana'] * data['hybridper']
data['wi_hybrid'] = data['state_wisconsin'] * data['hybridper']

exog = sm.add_constant(data.drop(columns = ['elapass', 'totaltest', 'state_indiana', 'state_wisconsin']))

XtX_inv = np.linalg.inv(np.dot(exog.T, exog))
ckk = np.diag(XtX_inv)

# critical value
critical_val = 2 / np.sqrt(data.shape[0])

In [None]:
in_ela = pd.read_csv('indiana_elapass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_ela, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'elapass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')

dummy_variables = pd.get_dummies(data[['state']], drop_first = True, dtype = 'int')
data = pd.concat([data, dummy_variables], axis=1).drop(columns = ['state'])
data['in_virtual'] = data['state_indiana'] * data['virtualper']
data['wi_virtual'] = data['state_wisconsin'] * data['virtualper']
data['in_hybrid'] = data['state_indiana'] * data['hybridper']
data['wi_hybrid'] = data['state_wisconsin'] * data['hybridper']

data['year'] = pd.to_datetime(data['year'], format='%y')
data = data.drop(columns = ['state_indiana', 'state_wisconsin']).set_index(['mergecode', 'year'])

data['black_virtual'] = data['black'] * data['virtualper']
data['black_hybrid'] = data['black'] * data['hybridper']
data['hispanic_virtual'] = data['hispanic'] * data['virtualper']
data['hispanic_hybrid'] = data['hispanic'] * data['hybridper']
data['lowincome_virtual'] = data['lowincome'] * data['virtualper']
data['lowincome_hybrid'] = data['lowincome'] * data['hybridper']

exog = data.drop(columns = ['totaltest', 'elapass'])
exog = sm.add_constant(exog)

In [None]:
virtualper_inf = dfbetas(data.elapass, exog, data.totaltest, 1, ckk = ckk[1])
df = pd.DataFrame()
df['elapass_virtualper_inf'] = virtualper_inf
df.to_csv('elapass_virtualper_influence.csv')
files.download(f"./elapass_virtualper_influence.csv")

In [None]:
hybridper_inf = dfbetas(data.elapass, exog, data.totaltest, 2, ckk = ckk[2])
df = pd.DataFrame()
df['elapass_hybridper_inf'] = hybridper_inf
df.to_csv('elapass_hybridper_influence.csv')
files.download(f"./elapass_hybridper_influence.csv")

In [None]:
in_virtualper_inf = dfbetas(data.elapass, exog, data.totaltest, 7, ckk = ckk[1332])
df = pd.DataFrame()
df['elapass_in_virtualper_inf'] = in_virtualper_inf
df.to_csv('elapass_in_virtualper_influence.csv')
files.download(f"./elapass_in_virtualper_influence.csv")

In [None]:
wi_virtualper_inf = dfbetas(data.elapass, exog, data.totaltest, 8, ckk = ckk[1333])
df = pd.DataFrame()
df['elapass_wi_virtualper_inf'] = wi_virtualper_inf
df.to_csv('elapass_wi_virtualper_influence.csv')
files.download(f"./elapass_wi_virtualper_influence.csv")

In [None]:
in_hybridper_inf = dfbetas(data.elapass, exog, data.totaltest, 9, ckk = ckk[1334])
df = pd.DataFrame()
df['elapass_in_hybridper_inf'] = in_hybridper_inf
df.to_csv('elapass_in_hybridper_influence.csv')
files.download(f"./elapass_in_hybridper_influence.csv")

In [None]:
wi_hybridper_inf = dfbetas(data.elapass, exog, data.totaltest, 10, ckk = ckk[1335])
df = pd.DataFrame()
df['elapass_wi_hybridper_inf'] = wi_hybridper_inf
df.to_csv('elapass_wi_hybridper_influence.csv')
files.download(f"./elapass_wi_hybridper_influence.csv")

In [None]:
black_virtualper_inf = dfbetas(data.elapass, exog, data.totaltest, 11, ckk = ckk[7])
df = pd.DataFrame()
df['elapass_black_virtualper_inf'] = black_virtualper_inf
df.to_csv('elapass_black_virtualper_influence.csv')
files.download(f"./elapass_black_virtualper_influence.csv")

In [None]:
black_hybridper_inf = dfbetas(data.elapass, exog, data.totaltest, 12, ckk = ckk[8])
df = pd.DataFrame()
df['black_elapass_hybridper_inf'] = black_hybridper_inf
df.to_csv('elapass_black_hybridper_influence.csv')
from google.colab import files
files.download(f"./elapass_black_hybridper_influence.csv")

In [None]:
hispanic_virtualper_inf = dfbetas(data.elapass, exog, data.totaltest, 13, ckk = ckk[9])
df = pd.DataFrame()
df['elapass_hispanic_virtualper_inf'] = hispanic_virtualper_inf
df.to_csv('elapass_hispanic_virtualper_influence.csv')
files.download(f"./elapass_hispanic_virtualper_influence.csv")

In [None]:
hispanic_hybridper_inf = dfbetas(data.elapass, exog, data.totaltest, 14, ckk = ckk[10])
df = pd.DataFrame()
df['hispanic_elapass_hybridper_inf'] = hispanic_hybridper_inf
df.to_csv('elapass_hispanic_hybridper_influence.csv')
files.download(f"./elapass_hispanic_hybridper_influence.csv")

In [None]:
ed_virtualper_inf = dfbetas(data.elapass, exog, data.totaltest, 15, ckk = ckk[11])
df = pd.DataFrame()
df['elapass_ed_virtualper_inf'] = ed_virtualper_inf
df.to_csv('elapass_ed_virtualper_influence.csv')
files.download(f"./elapass_ed_virtualper_influence.csv")

In [None]:
ed_hybridper_inf = dfbetas(data.elapass, exog, data.totaltest, 16, ckk = ckk[12])
df = pd.DataFrame()
df['ed_elapass_hybridper_inf'] = ed_hybridper_inf
df.to_csv('elapass_ed_hybridper_influence.csv')
files.download(f"./elapass_ed_hybridper_influence.csv")

# Dropout Rate

In [None]:
drop = pd.read_csv('drop_inf_remove.csv').drop_drop.to_list()

# compute ckk
az = pd.read_csv('arizona_dropout.csv')
co = pd.read_csv('colorado_dropout.csv')
ga = pd.read_csv('georgia_dropout.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([az, co, ga, wi]).loc[:, ['year', 'mergecode', 'state', 'totalenroll', 'droprate',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')

data['year'] = data['year'].astype('str')
data['black_virtual'] = data['black'] * data['virtualper']
data['black_hybrid'] = data['black'] * data['hybridper']
data['hispanic_virtual'] = data['hispanic'] * data['virtualper']
data['hispanic_hybrid'] = data['hispanic'] * data['hybridper']
data['lowincome_virtual'] = data['lowincome'] * data['virtualper']
data['lowincome_hybrid'] = data['lowincome'] * data['hybridper']

dummy_variables = pd.get_dummies(data[['mergecode', 'year', 'state']], drop_first = True, dtype = 'int')
data = pd.concat([data, dummy_variables], axis=1).drop(columns = ['year', 'mergecode', 'state'])

data['co_virtual'] = data['state_colorado'] * data['virtualper']
data['ga_virtual'] = data['state_georgia'] * data['virtualper']
data['wi_virtual'] = data['state_wisconsin'] * data['virtualper']
data['co_hybrid'] = data['state_colorado'] * data['hybridper']
data['ga_hybrid'] = data['state_georgia'] * data['hybridper']
data['wi_hybrid'] = data['state_wisconsin'] * data['hybridper']

exog = sm.add_constant(data.drop(columns = ['droprate', 'totalenroll', 'state_colorado', 'state_georgia', 'state_wisconsin']))

XtX_inv = np.linalg.inv(np.dot(exog.T, exog))
ckk = np.diag(XtX_inv)

In [None]:
virtualper_inf = dfbetas(data.droprate, exog, data.totalenroll, 1, ckk = ckk[1])
df = pd.DataFrame()
df['drop_virtual'] = virtualper_inf
df.to_csv('drop_virtual.csv')
files.download(f"./drop_virtual.csv")

In [None]:
hybridper_inf = dfbetas(data.droprate, exog, data.totalenroll, 2, ckk = ckk[2])
df = pd.DataFrame()
df['drop_hybrid'] = hybridper_inf
df.to_csv('drop_hybrid.csv')
files.download(f"./drop_hybrid.csv")

In [None]:
co_virtual_inf = dfbetas(data.droprate, exog, data.totalenroll, 7, ckk = ckk[1308])
df = pd.DataFrame()
df['drop_co_virtual'] = co_virtualper_inf
df.to_csv('drop_co_virtual.csv')
files.download(f"./drop_co_virtual.csv")

In [None]:
ga_virtual_inf = dfbetas(data.droprate, exog, data.totalenroll, 8, ckk = ckk[1309])
df = pd.DataFrame()
df['drop_ga_virtual'] = ga_virtual_inf
df.to_csv('drop_ga_virtual.csv')
files.download(f"./drop_ga_virtual.csv")

In [None]:
wi_virtual_inf = dfbetas(data.droprate, exog, data.totalenroll, 9, ckk = ckk[1310])
df = pd.DataFrame()
df['drop_wi_virtual'] = wi_virtual_inf
df.to_csv('drop_wi_virtual.csv')
files.download(f"./drop_wi_virtual.csv")

In [None]:
co_hybrid_inf = dfbetas(data.droprate, exog, data.totalenroll, 10, ckk = ckk[1311])
df = pd.DataFrame()
df['drop_co_hybrid'] = co_hybrid_inf
df.to_csv('drop_co_hybrid.csv')
files.download(f"./drop_co_hybrid.csv")

In [None]:
ga_hybrid_inf = dfbetas(data.droprate, exog, data.totalenroll, 11, ckk = ckk[1312])
df = pd.DataFrame()
df['drop_ga_hybrid'] = ga_hybrid_inf
df.to_csv('drop_ga_hybrid.csv')
files.download(f"./drop_ga_hybrid.csv")

In [None]:
wi_hybrid_inf = dfbetas(data.droprate, exog, data.totalenroll, 12, ckk = ckk[1313])
df = pd.DataFrame()
df['drop_wi_hybrid'] = wi_hybrid_inf
df.to_csv('drop_wi_hybrid.csv')
files.download(f"./drop_wi_hybrid.csv")

In [None]:
black_virtual_inf = dfbetas(data.droprate, exog, data.totalenroll, 13, ckk = ckk[7])
df = pd.DataFrame()
df['drop_black_virtual'] = black_virtual_inf
df.to_csv('drop_black_virtual.csv')
files.download(f"./drop_black_virtual.csv")

In [None]:
black_hybrid_inf = dfbetas(data.droprate, exog, data.totalenroll, 14, ckk = ckk[8])
df = pd.DataFrame()
df['drop_black_hybrid'] = black_hybrid_inf
df.to_csv('drop_black_hybrid.csv')
files.download(f"./drop_black_hybrid.csv")

In [None]:
his_virtual_inf = dfbetas(data.droprate, exog, data.totalenroll, 15, ckk = ckk[9])
df = pd.DataFrame()
df['drop_his_virtual'] = his_virtual_inf
df.to_csv('drop_his_virtual.csv')
files.download(f"./drop_his_virtual.csv")

In [None]:
his_hybrid_inf = dfbetas(data.droprate, exog, data.totalenroll, 16, ckk = ckk[10])
df = pd.DataFrame()
df['drop_his_hybrid'] = his_hybrid_inf
df.to_csv('drop_his_hybrid.csv')
files.download(f"./drop_his_hybrid.csv")

In [None]:
ed_virtual_inf = dfbetas(data.droprate, exog, data.totalenroll, 17, ckk = ckk[11])
df = pd.DataFrame()
df['drop_ed_virtual'] = ed_virtual_inf
df.to_csv('drop_ed_virtual.csv')
files.download(f"./drop_ed_virtual.csv")

In [None]:
ed_hybrid_inf = dfbetas(data.droprate, exog, data.totalenroll, 18, ckk = ckk[12])
df = pd.DataFrame()
df['drop_ed_hybrid'] = ed_hybrid_inf
df.to_csv('drop_ed_hybrid.csv')
files.download(f"./drop_ed_hybrid.csv")