In [9]:
import pandas as pd
import numpy as np
from scipy.stats import t

# Math Proficiency Rate

In [40]:
# load final dataset
drop = pd.read_csv('math_drop.csv').math_drop.to_list()
in_math = pd.read_csv('indiana_mathpass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_math, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')

# math change by state
state_year_total = data.groupby(['state', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
columns = {'totaltest':'state_year_total'})
d1 = state_year_total.merge(data, on = ['state', 'year'])
d1['weight'] = d1['totaltest'] / d1['state_year_total']

def weight_ci(df):
    weighted_mean = np.average(df['mathpass'], weights=df['weight'])
    weighted_var = np.sum(df['weight']**2 * np.var(df['mathpass']))
    se_weighted_mean = np.sqrt(weighted_var / np.sum(df['weight'])**2)
    t_score = t.ppf(0.975, df=len(df)-1)
    margin_of_error = t_score * se_weighted_mean
    lower_bound = weighted_mean - margin_of_error
    upper_bound = weighted_mean + margin_of_error
    return pd.Series({'weighted_mean': weighted_mean,
                      'lower_bound': lower_bound,
                      'upper_bound': upper_bound})

d1.groupby(['state', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,state,year,weighted_mean,lower_bound,upper_bound
0,il,17,33.929596,32.07373,35.785462
1,il,18,33.039481,31.123991,34.954971
2,il,19,32.356854,30.439845,34.273864
3,il,21,27.165557,25.457633,28.873482
4,indiana,17,36.562422,34.420797,38.704047
5,indiana,18,36.238971,34.056385,38.421557
6,indiana,19,34.781813,32.554639,37.008988
7,indiana,21,35.57823,33.369275,37.787184
8,wisconsin,17,38.919301,36.964467,40.874134
9,wisconsin,18,37.889293,35.976974,39.801611


In [41]:
# math change by race
def weighted_median(data, weights):

    sorted_data = np.sort(data)
    sorted_weights = weights[np.argsort(data)]
    cumulative_weights = np.cumsum(sorted_weights)
    median_index = np.searchsorted(cumulative_weights, 0.5 * cumulative_weights.iloc[-1])
    if cumulative_weights.iloc[-1] % 1 == 0:
        return sorted_data[median_index]
    else:
        return (sorted_data[median_index - 1] + sorted_data[median_index]) / 2
    
def weighted_median_by_state(state, subject, attribute):
    df = pd.read_csv(f'{state}_{subject}.csv')
    df = df[df['year'] == 21].reset_index()
    df['state_total'] = df.totalenroll.sum()
    df['weight'] = df['totalenroll'] / df['state_total']
    weighted_median_value = weighted_median(df[f'{attribute}'], df['weight'])
    return list(df[df[f'{attribute}'] <= weighted_median_value].mergecode.unique())

state = ['indiana']

low_hispanic = []
for i in state:
    low_hispanic.extend(weighted_median_by_state(i, 'mathpass', 'hispanic'))
    
low_black = []
for i in state:
    low_black.extend(weighted_median_by_state(i, 'mathpass', 'black'))
    
low_frpm = []
for i in state:
    low_frpm.extend(weighted_median_by_state(i, 'mathpass', 'lowincome'))
    
low_mode= []
for i in state:
    low_mode.extend(weighted_median_by_state(i, 'mathpass', 'schoolmode'))
    
state = ['illinois', 'wisconsin']

for i in state:
    low_hispanic.extend(weighted_median_by_state(i, 'all', 'hispanic'))
    
for i in state:
    low_black.extend(weighted_median_by_state(i, 'all', 'black'))
    
for i in state:
    low_frpm.extend(weighted_median_by_state(i, 'all', 'lowincome'))
    
for i in state:
    low_mode.extend(weighted_median_by_state(i, 'all', 'schoolmode'))

In [42]:
drop = pd.read_csv('math_drop.csv').math_drop.to_list()
in_math = pd.read_csv('indiana_mathpass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_math, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns='index')

data['high_black'] = 1
data.loc[data['mergecode'].isin(low_black), 'high_black'] = 0
data['high_his'] = 1
data.loc[data['mergecode'].isin(low_hispanic), 'high_his'] = 0
data['high_frpm'] = 1
data.loc[data['mergecode'].isin(low_frpm), 'high_frpm'] = 0

black_year_total = data.groupby(['high_black', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'black_year_total'})

d1 = black_year_total.merge(data, on=['high_black', 'year'])
d1['weight'] = d1['totaltest'] / d1['black_year_total']

d1.groupby(['high_black', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_black,year,weighted_mean,lower_bound,upper_bound
0,0,17,39.106799,37.842003,40.371595
1,0,18,38.163948,36.825492,39.502404
2,0,19,36.057684,34.724773,37.390596
3,0,21,32.696953,31.395414,33.998491
4,1,17,31.070895,28.647665,33.494125
5,1,18,30.706142,28.288976,33.123307
6,1,19,28.931418,26.532747,31.330088
7,1,21,25.759216,23.491226,28.027206


In [43]:
his_year_total = data.groupby(['high_his', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'his_year_total'})

d1 = his_year_total.merge(data, on=['high_his', 'year'])
d1['weight'] = d1['totaltest'] / d1['his_year_total']
d1.groupby(['high_his', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_his,year,weighted_mean,lower_bound,upper_bound
0,0,17,38.039798,36.534974,39.544622
1,0,18,37.607312,36.037599,39.177025
2,0,19,35.823543,34.300159,37.346927
3,0,21,32.605591,31.102788,34.108394
4,1,17,32.485147,30.513082,34.457211
5,1,18,31.652635,29.722317,33.582953
6,1,19,29.46716,27.460365,31.473955
7,1,21,26.190457,24.332459,28.048455


In [44]:
frpm_year_total = data.groupby(['high_frpm', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'frpm_year_total'})

d1 = frpm_year_total.merge(data, on=['high_frpm', 'year'])
d1['weight'] = d1['totaltest'] / d1['frpm_year_total']
d1.groupby(['high_frpm', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_frpm,year,weighted_mean,lower_bound,upper_bound
0,0,17,46.517279,44.994451,48.040107
1,0,18,45.147551,43.534333,46.760769
2,0,19,43.176662,41.522643,44.830681
3,0,21,39.526367,37.945265,41.10747
4,1,17,24.987944,23.591744,26.384145
5,1,18,24.452219,23.021392,25.883046
6,1,19,22.482105,21.113368,23.850843
7,1,21,19.038154,17.783637,20.292671


In [45]:
# math change by mode
def weighted_median(data, weights):

    sorted_data = np.sort(data)
    sorted_weights = weights[np.argsort(data)]
    cumulative_weights = np.cumsum(sorted_weights)
    median_index = np.searchsorted(cumulative_weights, 0.5 * cumulative_weights.iloc[-1])
    if cumulative_weights.iloc[-1] % 1 == 0:
        return sorted_data[median_index]
    else:
        return (sorted_data[median_index - 1] + sorted_data[median_index]) / 2
    
def weighted_median_by_state(state, subject, attribute):
    df = pd.read_csv(f'{state}_{subject}.csv')
    df = df[df['year'] == 21].reset_index()
    df['state_total'] = df.totalenroll.sum()
    df['weight'] = df['totalenroll'] / df['state_total']
    weighted_median_value = weighted_median(df[f'{attribute}'], df['weight'])
    return list(df[df[f'{attribute}'] <= weighted_median_value].mergecode.unique())

state = ['indiana']

low_vir = []
for i in state:
    low_vir.extend(weighted_median_by_state(i, 'mathpass', 'virtualper'))
    
low_hy= []
for i in state:
    low_hy.extend(weighted_median_by_state(i, 'mathpass', 'hybridper'))
    
state = ['illinois', 'wisconsin']

for i in state:
    low_vir.extend(weighted_median_by_state(i, 'all', 'virtualper'))
    
for i in state:
    low_hy.extend(weighted_median_by_state(i, 'all', 'hybridper'))

In [46]:
drop = pd.read_csv('math_drop.csv').math_drop.to_list()
in_math = pd.read_csv('indiana_mathpass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_math, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns='index')

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totaltest'] / d1['vir_year_total']
d1.groupby(['high_vir', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_vir,year,weighted_mean,lower_bound,upper_bound
0,0,17,37.741679,36.387053,39.096305
1,0,18,37.031367,35.605614,38.45712
2,0,19,34.928117,33.513543,36.342691
3,0,21,32.002011,30.606167,33.397856
4,1,17,32.553359,30.222828,34.883891
5,1,18,31.93891,29.617474,34.260346
6,1,19,30.119787,27.83233,32.407244
7,1,21,26.461031,24.334504,28.587558


In [47]:
hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totaltest'] / d1['hy_year_total']
d1.groupby(['high_hy', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_hy,year,weighted_mean,lower_bound,upper_bound
0,0,17,33.756063,32.184082,35.328044
1,0,18,33.23649,31.638241,34.834739
2,0,19,31.284271,29.699627,32.868914
3,0,21,27.902794,26.378441,29.427147
4,1,17,38.001468,36.093111,39.909825
5,1,18,37.038423,35.055307,39.021538
6,1,19,35.009947,33.050118,36.969776
7,1,21,31.958791,30.082753,33.834829


In [48]:
# indiana math change by mode
data = pd.read_csv('indiana_mathpass.csv').loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns='index')

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totaltest'] / d1['vir_year_total']
d1.groupby(['high_vir', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_vir,year,weighted_mean,lower_bound,upper_bound
0,0,17,37.517232,35.019482,40.014981
1,0,18,37.605968,35.06428,40.147656
2,0,19,36.536805,33.990203,39.083407
3,0,21,37.201287,34.589897,39.812677
4,1,17,35.172566,31.369934,38.975199
5,1,18,34.263533,30.348689,38.178376
6,1,19,32.294773,28.213453,36.376093
7,1,21,33.225639,29.442166,37.009112


In [49]:
hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totaltest'] / d1['hy_year_total']
d1.groupby(['high_hy', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_hy,year,weighted_mean,lower_bound,upper_bound
0,0,17,35.899787,33.108927,38.690646
1,0,18,35.885019,33.011619,38.75842
2,0,19,34.530079,31.575234,37.484923
3,0,21,34.511131,31.619245,37.403017
4,1,17,37.40199,34.109748,40.694232
5,1,18,36.68329,33.382512,39.984067
6,1,19,35.095554,31.767894,38.423214
7,1,21,36.902424,33.537613,40.267234


In [50]:
# illinois math change by mode
data = pd.read_csv('illinois_all.csv').loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns='index')

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totaltest'] / d1['vir_year_total']
d1.groupby(['high_vir', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_vir,year,weighted_mean,lower_bound,upper_bound
0,0,17,36.738015,34.744099,38.731931
1,0,18,35.530289,33.338293,37.722286
2,0,19,34.550107,32.409165,36.691049
3,0,21,29.507854,27.58539,31.430317
4,1,17,30.368252,26.915123,33.821381
5,1,18,29.858807,26.476916,33.240697
6,1,19,29.526386,25.963505,33.089266
7,1,21,24.07872,20.984583,27.172856


In [51]:
hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totaltest'] / d1['hy_year_total']
d1.groupby(['high_hy', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_hy,year,weighted_mean,lower_bound,upper_bound
0,0,17,30.433477,28.145342,32.721613
1,0,18,29.416722,27.104728,31.728715
2,0,19,28.669454,26.325011,31.013897
3,0,21,23.533554,21.490674,25.576434
4,1,17,39.050202,36.018756,42.081647
5,1,18,37.981996,34.748722,41.215269
6,1,19,37.345346,34.215645,40.475048
7,1,21,31.913851,29.064666,34.763036


In [52]:
# wisconsin math change by mode
data = pd.read_csv('wisconsin_all.csv').loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns='index')

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totaltest'] / d1['vir_year_total']
d1.groupby(['high_vir', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_vir,year,weighted_mean,lower_bound,upper_bound
0,0,17,41.201555,38.790941,43.61217
1,0,18,40.344758,38.089616,42.599899
2,0,19,33.674928,31.338308,36.011548
3,0,21,31.465378,29.186808,33.743947
4,1,17,35.965275,32.675572,39.254979
5,1,18,34.706933,31.289621,38.124246
6,1,19,28.933871,25.684511,32.18323
7,1,21,24.339883,21.314881,27.364885


In [53]:
hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totaltest'] / d1['hy_year_total']
d1.groupby(['high_hy', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_hy,year,weighted_mean,lower_bound,upper_bound
0,0,17,41.061127,38.53214,43.590113
1,0,18,39.886113,37.434378,42.337848
2,0,19,34.244815,31.780471,36.709159
3,0,21,30.776856,28.320062,33.233651
4,1,17,35.542946,32.4604,38.625492
5,1,18,34.740864,31.683543,37.798184
6,1,19,27.435456,24.464574,30.406339
7,1,21,24.547764,21.85959,27.235938


# ELA Proficiency Rate

In [54]:
# load final dataset
drop = pd.read_csv('ela_inf_remove.csv').ela_drop.to_list()
in_ela = pd.read_csv('indiana_elapass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_ela, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'elapass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')

# ela change by state
state_year_total = data.groupby(['state', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
columns = {'totaltest':'state_year_total'})
d1 = state_year_total.merge(data, on = ['state', 'year'])
d1['weight'] = d1['totaltest'] / d1['state_year_total']

def weight_ci(df):
    weighted_mean = np.average(df['elapass'], weights=df['weight'])
    weighted_var = np.sum(df['weight']**2 * np.var(df['elapass']))
    se_weighted_mean = np.sqrt(weighted_var / np.sum(df['weight'])**2)
    t_score = t.ppf(0.975, df=len(df)-1)
    margin_of_error = t_score * se_weighted_mean
    lower_bound = weighted_mean - margin_of_error
    upper_bound = weighted_mean + margin_of_error
    return pd.Series({'weighted_mean': weighted_mean,
                      'lower_bound': lower_bound,
                      'upper_bound': upper_bound})

d1.groupby(['state', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,state,year,weighted_mean,lower_bound,upper_bound
0,il,17,39.168503,37.298857,41.038148
1,il,18,37.784344,35.858857,39.70983
2,il,19,36.553029,34.683672,38.422387
3,il,21,32.902865,31.094801,34.71093
4,indiana,17,61.882724,59.858911,63.906536
5,indiana,18,60.266718,58.25752,62.275917
6,indiana,19,63.367735,61.203146,65.532324
7,indiana,21,65.947107,63.831782,68.062432
8,wisconsin,17,41.550971,39.504269,43.597673
9,wisconsin,18,39.623002,37.644878,41.601126


In [55]:
# ela change by race
def weighted_median(data, weights):

    sorted_data = np.sort(data)
    sorted_weights = weights[np.argsort(data)]
    cumulative_weights = np.cumsum(sorted_weights)
    median_index = np.searchsorted(cumulative_weights, 0.5 * cumulative_weights.iloc[-1])
    if cumulative_weights.iloc[-1] % 1 == 0:
        return sorted_data[median_index]
    else:
        return (sorted_data[median_index - 1] + sorted_data[median_index]) / 2
    
def weighted_median_by_state(state, subject, attribute):
    df = pd.read_csv(f'{state}_{subject}.csv')
    df = df[df['year'] == 21].reset_index()
    df['state_total'] = df.totalenroll.sum()
    df['weight'] = df['totalenroll'] / df['state_total']
    weighted_median_value = weighted_median(df[f'{attribute}'], df['weight'])
    return list(df[df[f'{attribute}'] <= weighted_median_value].mergecode.unique())

state = ['indiana']

low_hispanic = []
for i in state:
    low_hispanic.extend(weighted_median_by_state(i, 'elapass', 'hispanic'))
    
low_black = []
for i in state:
    low_black.extend(weighted_median_by_state(i, 'elapass', 'black'))
    
low_frpm = []
for i in state:
    low_frpm.extend(weighted_median_by_state(i, 'elapass', 'lowincome'))
    
low_mode= []
for i in state:
    low_mode.extend(weighted_median_by_state(i, 'elapass', 'schoolmode'))
    
state = ['illinois', 'wisconsin']

for i in state:
    low_hispanic.extend(weighted_median_by_state(i, 'all', 'hispanic'))
    
for i in state:
    low_black.extend(weighted_median_by_state(i, 'all', 'black'))
    
for i in state:
    low_frpm.extend(weighted_median_by_state(i, 'all', 'lowincome'))
    
for i in state:
    low_mode.extend(weighted_median_by_state(i, 'all', 'schoolmode'))

In [57]:
drop = pd.read_csv('ela_inf_remove.csv').ela_drop.to_list()
in_ela = pd.read_csv('indiana_elapass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_ela, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'elapass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')

data['high_black'] = 1
data.loc[data['mergecode'].isin(low_black), 'high_black'] = 0
data['high_his'] = 1
data.loc[data['mergecode'].isin(low_hispanic), 'high_his'] = 0
data['high_frpm'] = 1
data.loc[data['mergecode'].isin(low_frpm), 'high_frpm'] = 0

black_year_total = data.groupby(['high_black', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'black_year_total'})

d1 = black_year_total.merge(data, on=['high_black', 'year'])
d1['weight'] = d1['totaltest'] / d1['black_year_total']

d1.groupby(['high_black', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_black,year,weighted_mean,lower_bound,upper_bound
0,0,17,48.786408,47.211584,50.361232
1,0,18,47.451665,45.860961,49.042369
2,0,19,47.244486,45.549699,48.939273
3,0,21,44.53396,42.597955,46.469965
4,1,17,41.098685,38.272649,43.924721
5,1,18,39.887451,37.043142,42.731761
6,1,19,39.832075,36.858042,42.806107
7,1,21,38.160725,35.0802,41.24125


In [59]:
his_year_total = data.groupby(['high_his', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'his_year_total'})

d1 = his_year_total.merge(data, on=['high_his', 'year'])
d1['weight'] = d1['totaltest'] / d1['his_year_total']

d1.groupby(['high_his', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_his,year,weighted_mean,lower_bound,upper_bound
0,0,17,48.457933,46.639302,50.276563
1,0,18,47.928651,46.110148,49.747153
2,0,19,47.710033,45.787655,49.63241
3,0,21,45.257201,43.120179,47.394223
4,1,17,41.602131,39.214763,43.9895
5,1,18,39.577492,37.149817,42.005167
6,1,19,39.522032,36.952512,42.091552
7,1,21,37.590112,34.892207,40.288017


In [60]:
frpm_year_total = data.groupby(['high_frpm', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'frpm_year_total'})

d1 = frpm_year_total.merge(data, on=['high_frpm', 'year'])
d1['weight'] = d1['totaltest'] / d1['frpm_year_total']

d1.groupby(['high_frpm', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_frpm,year,weighted_mean,lower_bound,upper_bound
0,0,17,56.582213,54.769025,58.395401
1,0,18,54.786588,52.969996,56.603181
2,0,19,54.611098,52.655883,56.566314
3,0,21,52.122582,49.904463,54.340701
4,1,17,33.251957,31.465671,35.038243
5,1,18,31.923346,30.120135,33.726556
6,1,19,31.827426,29.912247,33.742606
7,1,21,29.223982,27.186158,31.261807


In [54]:
# ela change by mode
def weighted_median(data, weights):

    sorted_data = np.sort(data)
    sorted_weights = weights[np.argsort(data)]
    cumulative_weights = np.cumsum(sorted_weights)
    median_index = np.searchsorted(cumulative_weights, 0.5 * cumulative_weights.iloc[-1])
    if cumulative_weights.iloc[-1] % 1 == 0:
        return sorted_data[median_index]
    else:
        return (sorted_data[median_index - 1] + sorted_data[median_index]) / 2
    
def weighted_median_by_state(state, subject, attribute):
    df = pd.read_csv(f'{state}_{subject}.csv')
    df = df[df['year'] == 21].reset_index()
    df['state_total'] = df.totalenroll.sum()
    df['weight'] = df['totalenroll'] / df['state_total']
    weighted_median_value = weighted_median(df[f'{attribute}'], df['weight'])
    return list(df[df[f'{attribute}'] <= weighted_median_value].mergecode.unique())

state = ['indiana']

low_vir = []
for i in state:
    low_vir.extend(weighted_median_by_state(i, 'elapass', 'virtualper'))
    
low_hy= []
for i in state:
    low_hy.extend(weighted_median_by_state(i, 'elapass', 'hybridper'))
    
state = ['illinois', 'wisconsin']

for i in state:
    low_vir.extend(weighted_median_by_state(i, 'all', 'virtualper'))
    
for i in state:
    low_hy.extend(weighted_median_by_state(i, 'all', 'hybridper'))

In [61]:
drop = pd.read_csv('ela_inf_remove.csv').ela_drop.to_list()
in_ela = pd.read_csv('indiana_elapass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_ela, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'elapass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totaltest'] / d1['vir_year_total']
d1.groupby(['high_vir', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_vir,year,weighted_mean,lower_bound,upper_bound
0,0,17,47.897432,46.281564,49.5133
1,0,18,46.943253,45.316544,48.569963
2,0,19,46.490142,44.737211,48.243072
3,0,21,44.132689,42.154148,46.111231
4,1,17,42.026372,39.231835,44.820909
5,1,18,40.335929,37.528639,43.14322
6,1,19,40.559537,37.657205,43.461868
7,1,21,38.533343,35.481589,41.585096


In [62]:
hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totaltest'] / d1['hy_year_total']
d1.groupby(['high_hy', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_hy,year,weighted_mean,lower_bound,upper_bound
0,0,17,42.306097,40.424479,44.187716
1,0,18,41.008482,39.110646,42.906318
2,0,19,41.171404,39.165124,43.177684
3,0,21,38.890191,36.722814,41.057569
4,1,17,49.378478,47.128515,51.628442
5,1,18,48.010014,45.758923,50.261105
6,1,19,47.404931,45.002239,49.807622
7,1,21,45.250367,42.633215,47.867518


In [63]:
# indiana ela change by mode
data = pd.read_csv('indiana_elapass.csv').loc[:, ['year', 'mergecode', 'state', 'totaltest', 'elapass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totaltest'] / d1['vir_year_total']
d1.groupby(['high_vir', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_vir,year,weighted_mean,lower_bound,upper_bound
0,0,17,61.573303,59.233662,63.912943
1,0,18,60.197575,57.919549,62.475601
2,0,19,63.781223,61.339941,66.222505
3,0,21,65.689084,63.144289,68.233879
4,1,17,62.288889,58.639918,65.93786
5,1,18,60.358607,56.595045,64.12217
6,1,19,62.832227,58.820658,66.843797
7,1,21,66.283734,62.717266,69.850201


In [64]:
hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totaltest'] / d1['hy_year_total']
d1.groupby(['high_hy', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_hy,year,weighted_mean,lower_bound,upper_bound
0,0,17,60.692136,57.936412,63.447861
1,0,18,58.936086,56.16805,61.704122
2,0,19,62.430352,59.424735,65.435969
3,0,21,64.564292,61.705923,67.42266
4,1,17,63.347001,60.49221,66.201792
5,1,18,61.90685,59.183088,64.630611
6,1,19,64.515543,61.574562,67.456524
7,1,21,67.622659,64.604378,70.64094


In [65]:
# illinois ela change by mode
data = pd.read_csv('illinois_all.csv').loc[:, ['year', 'mergecode', 'state', 'totaltest', 'elapass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totaltest'] / d1['vir_year_total']
d1.groupby(['high_vir', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_vir,year,weighted_mean,lower_bound,upper_bound
0,0,17,42.825109,40.9015,44.748717
1,0,18,41.973031,39.950745,43.995317
2,0,19,40.053896,38.165776,41.942017
3,0,21,36.144791,34.203518,38.086064
4,1,17,34.596474,30.968321,38.224628
5,1,18,32.58368,28.988204,36.179157
6,1,19,32.178865,28.564333,35.793398
7,1,21,28.751514,25.33479,32.168238


In [66]:
hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totaltest'] / d1['hy_year_total']
d1.groupby(['high_hy', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_hy,year,weighted_mean,lower_bound,upper_bound
0,0,17,34.266266,31.929886,36.602647
1,0,18,32.571247,30.144079,34.998415
2,0,19,31.792786,29.433831,34.151741
3,0,21,28.046518,25.811544,30.281493
4,1,17,45.759999,42.843632,48.676365
5,1,18,44.408269,41.485333,47.331205
6,1,19,42.570903,39.712088,45.429718
7,1,21,38.811504,36.022689,41.600319


In [67]:
# wisconsin ela change by mode
data = pd.read_csv('wisconsin_all.csv').loc[:, ['year', 'mergecode', 'state', 'totaltest', 'elapass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totaltest'] / d1['vir_year_total']
d1.groupby(['high_vir', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_vir,year,weighted_mean,lower_bound,upper_bound
0,0,17,44.375091,41.931685,46.818498
1,0,18,42.359721,40.005551,44.713891
2,0,19,41.691086,39.332368,44.049804
3,0,21,38.008064,35.575815,40.440313
4,1,17,37.983153,34.380826,41.58548
5,1,18,36.137841,32.648193,39.627489
6,1,19,35.832592,32.293245,39.371938
7,1,21,30.074815,26.448028,33.701601


In [68]:
hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totaltest'] / d1['hy_year_total']
d1.groupby(['high_hy', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_hy,year,weighted_mean,lower_bound,upper_bound
0,0,17,43.540289,40.897501,46.183077
1,0,18,41.467197,38.967733,43.966661
2,0,19,41.151356,38.72048,43.582233
3,0,21,36.559671,33.863664,39.255679
4,1,17,38.309908,35.059586,41.56023
5,1,18,36.606121,33.338492,39.87375
6,1,19,35.726356,32.268974,39.183738
7,1,21,31.005815,27.879851,34.131779


# Dropout Rate

In [86]:
# load final dataset (include illinois for demonsrtation purposes)
drop = pd.read_csv('drop_inf_remove.csv').drop_drop.to_list() + [
    '644-0500georgia', '4412-90822arizona', '4403-6264arizona',
       '4320-90159arizona', '4221-90064arizona', '4196-92913arizona',
       '79874-78813arizona', '1010-5146colorado', '0870-2155colorado',
       '772-0110georgia', '4431-5877arizona']
az = pd.read_csv('arizona_dropout.csv')
co = pd.read_csv('colorado_dropout.csv')
ga = pd.read_csv('georgia_dropout.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([az, co, ga, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totalenroll', 'droprate',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')  

# dropout change by state
state_year_total = data.groupby(['state', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
columns = {'totalenroll':'state_year_total'})
d1 = state_year_total.merge(data, on = ['state', 'year'])
d1['weight'] = d1['totalenroll'] / d1['state_year_total']

def weight_ci(df):
    weighted_mean = np.average(df['droprate'], weights=df['weight'])
    weighted_var = np.sum(df['weight']**2 * np.var(df['droprate']))
    se_weighted_mean = np.sqrt(weighted_var / np.sum(df['weight'])**2)
    t_score = t.ppf(0.975, df=len(df)-1)
    margin_of_error = t_score * se_weighted_mean
    lower_bound = weighted_mean - margin_of_error
    upper_bound = weighted_mean + margin_of_error
    return pd.Series({'weighted_mean': weighted_mean,
                      'lower_bound': lower_bound,
                      'upper_bound': upper_bound})

d1.groupby(['state', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,state,year,weighted_mean,lower_bound,upper_bound
0,arizona,17,2.388152,1.296878,3.479427
1,arizona,18,2.381445,1.256243,3.506647
2,arizona,19,2.530822,1.511723,3.549921
3,arizona,21,4.050061,2.887911,5.21221
4,colorado,17,1.86126,1.182236,2.540284
5,colorado,18,1.790631,1.131615,2.449646
6,colorado,19,1.58804,0.979629,2.196452
7,colorado,21,1.475842,0.85905,2.092633
8,georgia,17,3.042829,2.538775,3.546883
9,georgia,18,2.890285,2.294654,3.485916


In [87]:
az = pd.read_csv('arizona_dropout.csv')
co = pd.read_csv('colorado_dropout.csv')
ga = pd.read_csv('georgia_dropout.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([az, co, ga, wi]).loc[:, ['year', 'mergecode', 'state', 'totalenroll', 'droprate',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')   

# dropout change by race
def weighted_median(data, weights):

    sorted_data = np.sort(data)
    sorted_weights = weights[np.argsort(data)]
    cumulative_weights = np.cumsum(sorted_weights)
    median_index = np.searchsorted(cumulative_weights, 0.5 * cumulative_weights.iloc[-1])
    if cumulative_weights.iloc[-1] % 1 == 0:
        return sorted_data[median_index]
    else:
        return (sorted_data[median_index - 1] + sorted_data[median_index]) / 2
    
def weighted_median_by_state(state, subject, attribute):
    df = pd.read_csv(f'{state}_{subject}.csv')
    df = df[df['year'] == 21].reset_index()
    df['state_total'] = df.totalenroll.sum()
    df['weight'] = df['totalenroll'] / df['state_total']
    weighted_median_value = weighted_median(df[f'{attribute}'], df['weight'])
    return list(df[df[f'{attribute}'] <= weighted_median_value].mergecode.unique())

state = ['arizona', 'colorado', 'georgia']

low_hispanic = []
for i in state:
    low_hispanic.extend(weighted_median_by_state(i, 'dropout', 'hispanic'))
    
low_black = []
for i in state:
    low_black.extend(weighted_median_by_state(i, 'dropout', 'black'))
    
low_frpm = []
for i in state:
    low_frpm.extend(weighted_median_by_state(i, 'dropout', 'lowincome'))
    
low_mode= []
for i in state:
    low_mode.extend(weighted_median_by_state(i, 'dropout', 'schoolmode'))
    
state = ['wisconsin']

for i in state:
    low_hispanic.extend(weighted_median_by_state(i, 'all', 'hispanic'))
    
for i in state:
    low_black.extend(weighted_median_by_state(i, 'all', 'black'))
    
for i in state:
    low_frpm.extend(weighted_median_by_state(i, 'all', 'lowincome'))
    
for i in state:
    low_mode.extend(weighted_median_by_state(i, 'all', 'schoolmode'))

In [88]:
data['high_black'] = 1
data.loc[data['mergecode'].isin(low_black), 'high_black'] = 0
data['high_his'] = 1
data.loc[data['mergecode'].isin(low_hispanic), 'high_his'] = 0
data['high_frpm'] = 1
data.loc[data['mergecode'].isin(low_frpm), 'high_frpm'] = 0

black_year_total = data.groupby(['high_black', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'black_year_total'})

d1 = black_year_total.merge(data, on=['high_black', 'year'])
d1['weight'] = d1['totalenroll'] / d1['black_year_total']
d1.groupby(['high_black', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_black,year,weighted_mean,lower_bound,upper_bound
0,0,17,1.92722,1.430542,2.423898
1,0,18,1.823693,1.323339,2.324047
2,0,19,1.670261,1.224513,2.116008
3,0,21,2.031411,1.512167,2.550655
4,1,17,2.56608,2.09095,3.041211
5,1,18,2.542539,2.034911,3.050167
6,1,19,2.468877,2.002642,2.935112
7,1,21,2.528204,1.998877,3.057532


In [89]:
his_year_total = data.groupby(['high_his', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'his_year_total'})

d1 = his_year_total.merge(data, on=['high_his', 'year'])
d1['weight'] = d1['totalenroll'] / d1['his_year_total']
d1.groupby(['high_his', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_his,year,weighted_mean,lower_bound,upper_bound
0,0,17,1.751346,1.467635,2.035057
1,0,18,1.690373,1.382841,1.997905
2,0,19,1.577079,1.322191,1.831967
3,0,21,1.761396,1.464075,2.058716
4,1,17,2.776096,2.114827,3.437365
5,1,18,2.70727,2.034021,3.380519
6,1,19,2.591068,1.972815,3.209321
7,1,21,2.833582,2.124967,3.542198


In [90]:
frpm_year_total = data.groupby(['high_frpm', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'frpm_year_total'})

d1 = frpm_year_total.merge(data, on=['high_frpm', 'year'])
d1['weight'] = d1['totalenroll'] / d1['frpm_year_total']
d1.groupby(['high_frpm', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_frpm,year,weighted_mean,lower_bound,upper_bound
0,0,17,1.380159,1.164064,1.596255
1,0,18,1.371095,1.121528,1.620662
2,0,19,1.271617,1.056357,1.486877
3,0,21,1.584656,1.312441,1.856871
4,1,17,3.180637,2.604849,3.756425
5,1,18,3.064877,2.478632,3.651122
6,1,19,2.944031,2.414937,3.473124
7,1,21,3.055873,2.453678,3.658068


In [91]:
# dropout change by mode
def weighted_median(data, weights):

    sorted_data = np.sort(data)
    sorted_weights = weights[np.argsort(data)]
    cumulative_weights = np.cumsum(sorted_weights)
    median_index = np.searchsorted(cumulative_weights, 0.5 * cumulative_weights.iloc[-1])
    if cumulative_weights.iloc[-1] % 1 == 0:
        return sorted_data[median_index]
    else:
        return (sorted_data[median_index - 1] + sorted_data[median_index]) / 2
    
def weighted_median_by_state(state, subject, attribute):
    df = pd.read_csv(f'{state}_{subject}.csv')
    df = df[df['year'] == 21].reset_index()
    df['state_total'] = df.totalenroll.sum()
    df['weight'] = df['totalenroll'] / df['state_total']
    weighted_median_value = weighted_median(df[f'{attribute}'], df['weight'])
    return list(df[df[f'{attribute}'] <= weighted_median_value].mergecode.unique())

state = ['arizona', 'colorado', 'georgia']

low_vir = []
for i in state:
    low_vir.extend(weighted_median_by_state(i, 'dropout', 'virtualper'))
    
low_hy= []
for i in state:
    low_hy.extend(weighted_median_by_state(i, 'dropout', 'hybridper'))
    
state = ['wisconsin']

for i in state:
    low_vir.extend(weighted_median_by_state(i, 'all', 'virtualper'))
    
for i in state:
    low_hy.extend(weighted_median_by_state(i, 'all', 'hybridper'))
    
data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totalenroll'] / d1['vir_year_total']
d1.groupby(['high_vir', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_vir,year,weighted_mean,lower_bound,upper_bound
0,0,17,1.851408,1.509584,2.193233
1,0,18,1.774299,1.41608,2.132518
2,0,19,1.583771,1.2904,1.877141
3,0,21,1.947395,1.606392,2.288397
4,1,17,2.712192,2.043492,3.380893
5,1,18,2.662021,1.978557,3.345485
6,1,19,2.642997,2.004171,3.281824
7,1,21,2.67324,1.934617,3.411862


In [92]:
hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totalenroll'] / d1['hy_year_total']
d1.groupby(['high_hy', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_hy,year,weighted_mean,lower_bound,upper_bound
0,0,17,2.381348,1.873132,2.889564
1,0,18,2.331929,1.81392,2.849938
2,0,19,2.251985,1.776265,2.727705
3,0,21,2.468517,1.920076,3.016958
4,1,17,2.070566,1.677321,2.463812
5,1,18,1.984768,1.563864,2.405672
6,1,19,1.826831,1.475159,2.178503
7,1,21,2.026926,1.625388,2.428464


In [93]:
# colorado dropout change by mode
data = pd.read_csv('colorado_dropout.csv').loc[:, ['year', 'mergecode', 'state', 'totalenroll', 'droprate',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')   

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totalenroll'] / d1['vir_year_total']
d1.groupby(['high_vir', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_vir,year,weighted_mean,lower_bound,upper_bound
0,0,17,1.862431,1.116651,2.608211
1,0,18,1.753937,1.050087,2.457786
2,0,19,1.510893,0.93986,2.081926
3,0,21,1.481269,0.851539,2.110998
4,1,17,1.859469,0.477433,3.241505
5,1,18,1.846332,0.468341,3.224324
6,1,19,1.705238,0.307283,3.103192
7,1,21,1.467535,0.118108,2.816963


In [94]:
hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totalenroll'] / d1['hy_year_total']
d1.groupby(['high_hy', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_hy,year,weighted_mean,lower_bound,upper_bound
0,0,17,2.176854,1.239884,3.113824
1,0,18,2.184595,1.254545,3.114645
2,0,19,1.923961,1.031181,2.81674
3,0,21,1.689985,0.822228,2.557743
4,1,17,1.494201,0.58197,2.406432
5,1,18,1.333538,0.527065,2.140012
6,1,19,1.200257,0.590074,1.810439
7,1,21,1.226187,0.467693,1.984681


In [95]:
# arizona dropout change by mode
data = pd.read_csv('arizona_dropout.csv').loc[:, ['year', 'mergecode', 'state', 'totalenroll', 'droprate',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')   

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totalenroll'] / d1['vir_year_total']
d1.groupby(['high_vir', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_vir,year,weighted_mean,lower_bound,upper_bound
0,0,17,1.825352,0.858849,2.791855
1,0,18,1.923968,0.850362,2.997574
2,0,19,1.82429,0.923338,2.725242
3,0,21,3.237015,2.209118,4.264913
4,1,17,3.076155,1.133329,5.018981
5,1,18,2.943699,0.963108,4.924289
6,1,19,3.415885,1.61027,5.221499
7,1,21,5.093931,3.038152,7.149709


In [96]:
hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totalenroll'] / d1['hy_year_total']
d1.groupby(['high_hy', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_hy,year,weighted_mean,lower_bound,upper_bound
0,0,17,2.86054,1.278528,4.442552
1,0,18,2.833408,1.260061,4.406755
2,0,19,3.133298,1.689427,4.577169
3,0,21,4.821065,3.160909,6.481222
4,1,17,1.757185,0.645011,2.86936
5,1,18,1.774629,0.378536,3.170721
6,1,19,1.73058,0.569895,2.891264
7,1,21,3.034608,1.818293,4.250923


In [97]:
# wisconsin dropout change by mode
data = pd.read_csv('wisconsin_all.csv').loc[:, ['year', 'mergecode', 'state', 'totalenroll', 'droprate',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')   

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totalenroll'] / d1['vir_year_total']
d1.groupby(['high_vir', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_vir,year,weighted_mean,lower_bound,upper_bound
0,0,17,0.775646,0.608224,0.943067
1,0,18,0.785388,0.587014,0.983762
2,0,19,0.71732,0.583856,0.850784
3,0,21,0.594751,0.400746,0.788756
4,1,17,1.686909,1.31836,2.055458
5,1,18,1.695739,1.37525,2.016228
6,1,19,1.465143,1.169025,1.761261
7,1,21,1.223141,0.920861,1.525422


In [98]:
hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totalenroll'] / d1['hy_year_total']
d1.groupby(['high_hy', 'year']).apply(weight_ci).reset_index()

Unnamed: 0,high_hy,year,weighted_mean,lower_bound,upper_bound
0,0,17,1.111564,0.861842,1.361287
1,0,18,1.085057,0.857046,1.313069
2,0,19,1.006081,0.807512,1.204649
3,0,21,0.825611,0.61356,1.037662
4,1,17,1.307976,1.055529,1.560423
5,1,18,1.354898,1.077055,1.63274
6,1,19,1.134136,0.928432,1.33984
7,1,21,0.964324,0.688831,1.239817
