In [30]:
import pandas as pd
import numpy as np

# Math Proficiency Rates

In [31]:
# load final dataset
drop = pd.read_csv('math_drop.csv').math_drop.to_list()
in_math = pd.read_csv('indiana_mathpass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_math, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')

In [32]:
# math change by state
state_year_total = data.groupby(['state', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
columns = {'totaltest':'state_year_total'})
d1 = state_year_total.merge(data, on = ['state', 'year'])
d1['weight'] = d1['totaltest'] / d1['state_year_total']
d1['mathpass'] = d1['mathpass'] * d1['weight']
d1 = d1.groupby(['state', 'year']).sum().loc[:, ['mathpass']].reset_index()
d1.pivot(index = 'year', columns = 'state', values = 'mathpass').to_csv('math_change.csv')

In [33]:
# math change by race
def weighted_median(data, weights):

    sorted_data = np.sort(data)
    sorted_weights = weights[np.argsort(data)]
    cumulative_weights = np.cumsum(sorted_weights)
    median_index = np.searchsorted(cumulative_weights, 0.5 * cumulative_weights.iloc[-1])
    if cumulative_weights.iloc[-1] % 1 == 0:
        return sorted_data[median_index]
    else:
        return (sorted_data[median_index - 1] + sorted_data[median_index]) / 2
    
def weighted_median_by_state(state, subject, attribute):
    df = pd.read_csv(f'{state}_{subject}.csv')
    df = df[df['year'] == 21].reset_index()
    df['state_total'] = df.totalenroll.sum()
    df['weight'] = df['totalenroll'] / df['state_total']
    weighted_median_value = weighted_median(df[f'{attribute}'], df['weight'])
    return list(df[df[f'{attribute}'] <= weighted_median_value].mergecode.unique())

state = ['indiana']

low_hispanic = []
for i in state:
    low_hispanic.extend(weighted_median_by_state(i, 'mathpass', 'hispanic'))
    
low_black = []
for i in state:
    low_black.extend(weighted_median_by_state(i, 'mathpass', 'black'))
    
low_frpm = []
for i in state:
    low_frpm.extend(weighted_median_by_state(i, 'mathpass', 'lowincome'))
    
low_mode= []
for i in state:
    low_mode.extend(weighted_median_by_state(i, 'mathpass', 'schoolmode'))
    
state = ['illinois', 'wisconsin']

for i in state:
    low_hispanic.extend(weighted_median_by_state(i, 'all', 'hispanic'))
    
for i in state:
    low_black.extend(weighted_median_by_state(i, 'all', 'black'))
    
for i in state:
    low_frpm.extend(weighted_median_by_state(i, 'all', 'lowincome'))
    
for i in state:
    low_mode.extend(weighted_median_by_state(i, 'all', 'schoolmode'))

In [34]:
drop = pd.read_csv('math_drop.csv').math_drop.to_list()
in_math = pd.read_csv('indiana_mathpass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_math, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns='index')

data['high_black'] = 1
data.loc[data['mergecode'].isin(low_black), 'high_black'] = 0
data['high_his'] = 1
data.loc[data['mergecode'].isin(low_hispanic), 'high_his'] = 0
data['high_frpm'] = 1
data.loc[data['mergecode'].isin(low_frpm), 'high_frpm'] = 0

black_year_total = data.groupby(['high_black', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'black_year_total'})

d1 = black_year_total.merge(data, on=['high_black', 'year'])
d1['weight'] = d1['totaltest'] / d1['black_year_total']
d1['mathpass'] = d1['mathpass'] * d1['weight']
d1 = d1.groupby(['high_black', 'year']).sum().loc[:, ['mathpass']].reset_index().merge(black_year_total,
                                                                                         on=['high_black', 'year'])

df = d1.loc[:, ['high_black', 'year', 'mathpass']].sort_values(by=['high_black', 'year'])
black = df.pivot(columns='high_black', index='year', values='mathpass').reset_index().rename(
    columns={0: 'Low Black', 1: 'High Black'})

his_year_total = data.groupby(['high_his', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'his_year_total'})

d1 = his_year_total.merge(data, on=['high_his', 'year'])
d1['weight'] = d1['totaltest'] / d1['his_year_total']
d1['mathpass'] = d1['mathpass'] * d1['weight']
d1 = d1.groupby(['high_his', 'year']).sum().loc[:, ['mathpass']].reset_index().merge(his_year_total,
                                                                                      on=['high_his', 'year'])

df = d1.loc[:, ['high_his', 'year', 'mathpass']].sort_values(by=['high_his', 'year'])
his = df.pivot(columns='high_his', index='year', values='mathpass').reset_index().rename(
    columns={0: 'Low Hispanic', 1: 'High Hispanic'})

frpm_year_total = data.groupby(['high_frpm', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'frpm_year_total'})

d1 = frpm_year_total.merge(data, on=['high_frpm', 'year'])
d1['weight'] = d1['totaltest'] / d1['frpm_year_total']
d1['mathpass'] = d1['mathpass'] * d1['weight']
d1 = d1.groupby(['high_frpm', 'year']).sum().loc[:, ['mathpass']].reset_index().merge(frpm_year_total,
                                                                                       on=['high_frpm', 'year'])

df = d1.loc[:, ['high_frpm', 'year', 'mathpass']].sort_values(by=['high_frpm', 'year'])
frpm = df.pivot(columns='high_frpm', index='year', values='mathpass').reset_index().rename(
    columns={0: 'Low ED', 1: 'High ED'})

black.merge(his, on = 'year').merge(frpm, on = 'year').set_index('year').to_csv('math_change_attribute.csv')

In [35]:
# math change by mode
state = ['indiana']

low_vir = []
for i in state:
    low_vir.extend(weighted_median_by_state(i, 'mathpass', 'virtualper'))
    
low_hy= []
for i in state:
    low_hy.extend(weighted_median_by_state(i, 'mathpass', 'hybridper'))
    
state = ['illinois', 'wisconsin']

for i in state:
    low_vir.extend(weighted_median_by_state(i, 'all', 'virtualper'))
    
for i in state:
    low_hy.extend(weighted_median_by_state(i, 'all', 'hybridper'))
    
drop = pd.read_csv('math_drop.csv').math_drop.to_list()
in_math = pd.read_csv('indiana_mathpass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_math, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns='index')

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totaltest'] / d1['vir_year_total']
d1['mathpass'] = d1['mathpass'] * d1['weight']
d1 = d1.groupby(['high_vir', 'year']).sum().loc[:, ['mathpass']].reset_index().merge(vir_year_total,
                                          on=['high_vir', 'year'])

df = d1.loc[:, ['high_vir', 'year', 'mathpass']].sort_values(by=['high_vir', 'year'])
vir = df.pivot(columns='high_vir', index='year', values='mathpass').reset_index().rename(
    columns={0: 'Low Virtual', 1: 'High Virtual'})

hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totaltest'] / d1['hy_year_total']
d1['mathpass'] = d1['mathpass'] * d1['weight']
d1 = d1.groupby(['high_hy', 'year']).sum().loc[:, ['mathpass']].reset_index().merge(hy_year_total,
                                                                                      on=['high_hy', 'year'])

df = d1.loc[:, ['high_hy', 'year', 'mathpass']].sort_values(by=['high_hy', 'year'])
hy = df.pivot(columns='high_hy', index='year', values='mathpass').reset_index().rename(
    columns={0: 'Low Hybrid', 1: 'High Hybrid'})

vir.merge(hy, on = 'year').set_index('year').to_csv('math_change_mode.csv')

In [36]:
data = pd.read_csv('indiana_mathpass.csv').loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns='index')

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totaltest'] / d1['vir_year_total']
d1['mathpass'] = d1['mathpass'] * d1['weight']
d1 = d1.groupby(['high_vir', 'year']).sum().loc[:, ['mathpass']].reset_index().merge(vir_year_total,
                                          on=['high_vir', 'year'])

df = d1.loc[:, ['high_vir', 'year', 'mathpass']].sort_values(by=['high_vir', 'year'])
vir = df.pivot(columns='high_vir', index='year', values='mathpass').reset_index().rename(
    columns={0: 'Low Virtual', 1: 'High Virtual'})

hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totaltest'] / d1['hy_year_total']
d1['mathpass'] = d1['mathpass'] * d1['weight']
d1 = d1.groupby(['high_hy', 'year']).sum().loc[:, ['mathpass']].reset_index().merge(hy_year_total,
                                                                                      on=['high_hy', 'year'])

df = d1.loc[:, ['high_hy', 'year', 'mathpass']].sort_values(by=['high_hy', 'year'])
hy = df.pivot(columns='high_hy', index='year', values='mathpass').reset_index().rename(
    columns={0: 'Low Hybrid', 1: 'High Hybrid'})

vir.merge(hy, on = 'year').set_index('year').to_csv('math_change_mode_in.csv')

In [37]:
data = pd.read_csv('illinois_all.csv').loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns='index')

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totaltest'] / d1['vir_year_total']
d1['mathpass'] = d1['mathpass'] * d1['weight']
d1 = d1.groupby(['high_vir', 'year']).sum().loc[:, ['mathpass']].reset_index().merge(vir_year_total,
                                          on=['high_vir', 'year'])

df = d1.loc[:, ['high_vir', 'year', 'mathpass']].sort_values(by=['high_vir', 'year'])
vir = df.pivot(columns='high_vir', index='year', values='mathpass').reset_index().rename(
    columns={0: 'Low Virtual', 1: 'High Virtual'})

hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totaltest'] / d1['hy_year_total']
d1['mathpass'] = d1['mathpass'] * d1['weight']
d1 = d1.groupby(['high_hy', 'year']).sum().loc[:, ['mathpass']].reset_index().merge(hy_year_total,
                                                                                      on=['high_hy', 'year'])

df = d1.loc[:, ['high_hy', 'year', 'mathpass']].sort_values(by=['high_hy', 'year'])
hy = df.pivot(columns='high_hy', index='year', values='mathpass').reset_index().rename(
    columns={0: 'Low Hybrid', 1: 'High Hybrid'})

vir.merge(hy, on = 'year').set_index('year').to_csv('math_change_mode_il.csv')

In [38]:
data = pd.read_csv('wisconsin_all.csv').loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns='index')

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totaltest'] / d1['vir_year_total']
d1['mathpass'] = d1['mathpass'] * d1['weight']
d1 = d1.groupby(['high_vir', 'year']).sum().loc[:, ['mathpass']].reset_index().merge(vir_year_total,
                                          on=['high_vir', 'year'])

df = d1.loc[:, ['high_vir', 'year', 'mathpass']].sort_values(by=['high_vir', 'year'])
vir = df.pivot(columns='high_vir', index='year', values='mathpass').reset_index().rename(
    columns={0: 'Low Virtual', 1: 'High Virtual'})

hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totaltest'] / d1['hy_year_total']
d1['mathpass'] = d1['mathpass'] * d1['weight']
d1 = d1.groupby(['high_hy', 'year']).sum().loc[:, ['mathpass']].reset_index().merge(hy_year_total,
                                                                                      on=['high_hy', 'year'])

df = d1.loc[:, ['high_hy', 'year', 'mathpass']].sort_values(by=['high_hy', 'year'])
hy = df.pivot(columns='high_hy', index='year', values='mathpass').reset_index().rename(
    columns={0: 'Low Hybrid', 1: 'High Hybrid'})

vir.merge(hy, on = 'year').set_index('year').to_csv('math_change_mode_wi.csv')

# ELA Proficiency Rates

In [39]:
# load final dataset
drop = pd.read_csv('ela_inf_remove.csv').ela_drop.to_list()
in_ela = pd.read_csv('indiana_elapass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_ela, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'elapass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')

In [40]:
# ela change by state
state_year_total = data.groupby(['state', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
columns = {'totaltest':'state_year_total'})
d1 = state_year_total.merge(data, on = ['state', 'year'])
d1['weight'] = d1['totaltest'] / d1['state_year_total']
d1['elapass'] = d1['elapass'] * d1['weight']
d1 = d1.groupby(['state', 'year']).sum().loc[:, ['elapass']].reset_index()
d1.pivot(index = 'year', columns = 'state', values = 'elapass').to_csv('ela_change.csv')

In [41]:
# ela change by race
state = ['indiana']

low_hispanic = []
for i in state:
    low_hispanic.extend(weighted_median_by_state(i, 'elapass', 'hispanic'))
    
low_black = []
for i in state:
    low_black.extend(weighted_median_by_state(i, 'elapass', 'black'))
    
low_frpm = []
for i in state:
    low_frpm.extend(weighted_median_by_state(i, 'elapass', 'lowincome'))
    
low_mode= []
for i in state:
    low_mode.extend(weighted_median_by_state(i, 'elapass', 'schoolmode'))
    
state = ['illinois', 'wisconsin']

for i in state:
    low_hispanic.extend(weighted_median_by_state(i, 'all', 'hispanic'))
    
for i in state:
    low_black.extend(weighted_median_by_state(i, 'all', 'black'))
    
for i in state:
    low_frpm.extend(weighted_median_by_state(i, 'all', 'lowincome'))
    
for i in state:
    low_mode.extend(weighted_median_by_state(i, 'all', 'schoolmode'))

In [42]:
data['high_black'] = 1
data.loc[data['mergecode'].isin(low_black), 'high_black'] = 0
data['high_his'] = 1
data.loc[data['mergecode'].isin(low_hispanic), 'high_his'] = 0
data['high_frpm'] = 1
data.loc[data['mergecode'].isin(low_frpm), 'high_frpm'] = 0

black_year_total = data.groupby(['high_black', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'black_year_total'})

d1 = black_year_total.merge(data, on=['high_black', 'year'])
d1['weight'] = d1['totaltest'] / d1['black_year_total']
d1['elapass'] = d1['elapass'] * d1['weight']
d1 = d1.groupby(['high_black', 'year']).sum().loc[:, ['elapass']].reset_index().merge(black_year_total,
                                                                                on=['high_black', 'year'])

df = d1.loc[:, ['high_black', 'year', 'elapass']].sort_values(by=['high_black', 'year'])
black = df.pivot(columns='high_black', index='year', values='elapass').reset_index().rename(
    columns={0: 'Low Black', 1: 'High Black'})

his_year_total = data.groupby(['high_his', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'his_year_total'})

d1 = his_year_total.merge(data, on=['high_his', 'year'])
d1['weight'] = d1['totaltest'] / d1['his_year_total']
d1['elapass'] = d1['elapass'] * d1['weight']
d1 = d1.groupby(['high_his', 'year']).sum().loc[:, ['elapass']].reset_index().merge(his_year_total,
                                                                                      on=['high_his', 'year'])

df = d1.loc[:, ['high_his', 'year', 'elapass']].sort_values(by=['high_his', 'year'])
his = df.pivot(columns='high_his', index='year', values='elapass').reset_index().rename(
    columns={0: 'Low Hispanic', 1: 'High Hispanic'})

frpm_year_total = data.groupby(['high_frpm', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'frpm_year_total'})

d1 = frpm_year_total.merge(data, on=['high_frpm', 'year'])
d1['weight'] = d1['totaltest'] / d1['frpm_year_total']
d1['elapass'] = d1['elapass'] * d1['weight']
d1 = d1.groupby(['high_frpm', 'year']).sum().loc[:, ['elapass']].reset_index().merge(frpm_year_total,
                                                                                       on=['high_frpm', 'year'])

df = d1.loc[:, ['high_frpm', 'year', 'elapass']].sort_values(by=['high_frpm', 'year'])
frpm = df.pivot(columns='high_frpm', index='year', values='elapass').reset_index().rename(
    columns={0: 'Low ED', 1: 'High ED'})

black.merge(his, on = 'year').merge(frpm, on = 'year').set_index('year').to_csv('ela_change_attribute.csv')

In [43]:
# ela change by mode
state = ['indiana']

low_vir = []
for i in state:
    low_vir.extend(weighted_median_by_state(i, 'elapass', 'virtualper'))
    
low_hy= []
for i in state:
    low_hy.extend(weighted_median_by_state(i, 'elapass', 'hybridper'))
    
state = ['illinois', 'wisconsin']

for i in state:
    low_vir.extend(weighted_median_by_state(i, 'all', 'virtualper'))
    
for i in state:
    low_hy.extend(weighted_median_by_state(i, 'all', 'hybridper'))

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totaltest'] / d1['vir_year_total']
d1['elapass'] = d1['elapass'] * d1['weight']
d1 = d1.groupby(['high_vir', 'year']).sum().loc[:, ['elapass']].reset_index().merge(vir_year_total,
                                          on=['high_vir', 'year'])

df = d1.loc[:, ['high_vir', 'year', 'elapass']].sort_values(by=['high_vir', 'year'])
vir = df.pivot(columns='high_vir', index='year', values='elapass').reset_index().rename(
    columns={0: 'Low Virtual', 1: 'High Virtual'})

hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totaltest'] / d1['hy_year_total']
d1['elapass'] = d1['elapass'] * d1['weight']
d1 = d1.groupby(['high_hy', 'year']).sum().loc[:, ['elapass']].reset_index().merge(hy_year_total,
                                                                                      on=['high_hy', 'year'])

df = d1.loc[:, ['high_hy', 'year', 'elapass']].sort_values(by=['high_hy', 'year'])
hy = df.pivot(columns='high_hy', index='year', values='elapass').reset_index().rename(
    columns={0: 'Low Hybrid', 1: 'High Hybrid'})

vir.merge(hy, on = 'year').set_index('year').to_csv('ela_change_mode.csv')

In [44]:
data = pd.read_csv('indiana_elapass.csv').loc[:, ['year', 'mergecode', 'state', 'totaltest', 'elapass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns='index')

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totaltest'] / d1['vir_year_total']
d1['elapass'] = d1['elapass'] * d1['weight']
d1 = d1.groupby(['high_vir', 'year']).sum().loc[:, ['elapass']].reset_index().merge(vir_year_total,
                                          on=['high_vir', 'year'])

df = d1.loc[:, ['high_vir', 'year', 'elapass']].sort_values(by=['high_vir', 'year'])
vir = df.pivot(columns='high_vir', index='year', values='elapass').reset_index().rename(
    columns={0: 'Low Virtual', 1: 'High Virtual'})

hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totaltest'] / d1['hy_year_total']
d1['elapass'] = d1['elapass'] * d1['weight']
d1 = d1.groupby(['high_hy', 'year']).sum().loc[:, ['elapass']].reset_index().merge(hy_year_total,
                                                                                      on=['high_hy', 'year'])

df = d1.loc[:, ['high_hy', 'year', 'elapass']].sort_values(by=['high_hy', 'year'])
hy = df.pivot(columns='high_hy', index='year', values='elapass').reset_index().rename(
    columns={0: 'Low Hybrid', 1: 'High Hybrid'})

vir.merge(hy, on = 'year').set_index('year').to_csv('ela_change_mode_in.csv')

In [45]:
data = pd.read_csv('illinois_all.csv').loc[:, ['year', 'mergecode', 'state', 'totaltest', 'elapass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns='index')
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totaltest'] / d1['vir_year_total']
d1['elapass'] = d1['elapass'] * d1['weight']
d1 = d1.groupby(['high_vir', 'year']).sum().loc[:, ['elapass']].reset_index().merge(vir_year_total,
                                          on=['high_vir', 'year'])

df = d1.loc[:, ['high_vir', 'year', 'elapass']].sort_values(by=['high_vir', 'year'])
vir = df.pivot(columns='high_vir', index='year', values='elapass').reset_index().rename(
    columns={0: 'Low Virtual', 1: 'High Virtual'})

hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totaltest'] / d1['hy_year_total']
d1['elapass'] = d1['elapass'] * d1['weight']
d1 = d1.groupby(['high_hy', 'year']).sum().loc[:, ['elapass']].reset_index().merge(hy_year_total,
                                                                                      on=['high_hy', 'year'])

df = d1.loc[:, ['high_hy', 'year', 'elapass']].sort_values(by=['high_hy', 'year'])
hy = df.pivot(columns='high_hy', index='year', values='elapass').reset_index().rename(
    columns={0: 'Low Hybrid', 1: 'High Hybrid'})

vir.merge(hy, on = 'year').set_index('year').to_csv('ela_change_mode_il.csv')

In [46]:
data = pd.read_csv('wisconsin_all.csv').loc[:, ['year', 'mergecode', 'state', 'totaltest', 'elapass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns='index')

data = data[data['state'] == 'wisconsin']
data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totaltest'] / d1['vir_year_total']
d1['elapass'] = d1['elapass'] * d1['weight']
d1 = d1.groupby(['high_vir', 'year']).sum().loc[:, ['elapass']].reset_index().merge(vir_year_total,
                                          on=['high_vir', 'year'])

df = d1.loc[:, ['high_vir', 'year', 'elapass']].sort_values(by=['high_vir', 'year'])
vir = df.pivot(columns='high_vir', index='year', values='elapass').reset_index().rename(
    columns={0: 'Low Virtual', 1: 'High Virtual'})

hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totaltest'].reset_index().rename(
    columns={'totaltest': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totaltest'] / d1['hy_year_total']
d1['elapass'] = d1['elapass'] * d1['weight']
d1 = d1.groupby(['high_hy', 'year']).sum().loc[:, ['elapass']].reset_index().merge(hy_year_total,
                                                                                      on=['high_hy', 'year'])

df = d1.loc[:, ['high_hy', 'year', 'elapass']].sort_values(by=['high_hy', 'year'])
hy = df.pivot(columns='high_hy', index='year', values='elapass').reset_index().rename(
    columns={0: 'Low Hybrid', 1: 'High Hybrid'})

vir.merge(hy, on = 'year').set_index('year').to_csv('ela_change_mode_wi.csv')

# Dropout Rates

In [47]:
drop = pd.read_csv('drop_inf_remove.csv').drop_drop.to_list() + [
    '644-0500georgia', '4412-90822arizona', '4403-6264arizona',
       '4320-90159arizona', '4221-90064arizona', '4196-92913arizona',
       '79874-78813arizona', '1010-5146colorado', '0870-2155colorado',
       '772-0110georgia', '4431-5877arizona']
az = pd.read_csv('arizona_dropout.csv')
co = pd.read_csv('colorado_dropout.csv')
ga = pd.read_csv('georgia_dropout.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([az, co, ga, wi]).loc[:, ['year', 'mergecode', 'state', 'totalenroll', 'droprate',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')   

In [20]:
# drop change by race

state = ['arizona', 'colorado', 'georgia']

low_hispanic = []
for i in state:
    low_hispanic.extend(weighted_median_by_state(i, 'dropout', 'hispanic'))
    
low_black = []
for i in state:
    low_black.extend(weighted_median_by_state(i, 'dropout', 'black'))
    
low_frpm = []
for i in state:
    low_frpm.extend(weighted_median_by_state(i, 'dropout', 'lowincome'))
    
low_mode= []
for i in state:
    low_mode.extend(weighted_median_by_state(i, 'dropout', 'schoolmode'))
    
state = ['wisconsin']

for i in state:
    low_hispanic.extend(weighted_median_by_state(i, 'all', 'hispanic'))
    
for i in state:
    low_black.extend(weighted_median_by_state(i, 'all', 'black'))
    
for i in state:
    low_frpm.extend(weighted_median_by_state(i, 'all', 'lowincome'))
    
for i in state:
    low_mode.extend(weighted_median_by_state(i, 'all', 'schoolmode'))

In [21]:
data['high_black'] = 1
data.loc[data['mergecode'].isin(low_black), 'high_black'] = 0
data['high_his'] = 1
data.loc[data['mergecode'].isin(low_hispanic), 'high_his'] = 0
data['high_frpm'] = 1
data.loc[data['mergecode'].isin(low_frpm), 'high_frpm'] = 0

black_year_total = data.groupby(['high_black', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'black_year_total'})

d1 = black_year_total.merge(data, on=['high_black', 'year'])
d1['weight'] = d1['totalenroll'] / d1['black_year_total']
d1['droprate'] = d1['droprate'] * d1['weight']
d1 = d1.groupby(['high_black', 'year']).sum().loc[:, ['droprate']].reset_index().merge(black_year_total,
                                                                                on=['high_black', 'year'])

df = d1.loc[:, ['high_black', 'year', 'droprate']].sort_values(by=['high_black', 'year'])
black = df.pivot(columns='high_black', index='year', values='droprate').reset_index().rename(
    columns={0: 'Low Black', 1: 'High Black'})

his_year_total = data.groupby(['high_his', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'his_year_total'})

d1 = his_year_total.merge(data, on=['high_his', 'year'])
d1['weight'] = d1['totalenroll'] / d1['his_year_total']
d1['droprate'] = d1['droprate'] * d1['weight']
d1 = d1.groupby(['high_his', 'year']).sum().loc[:, ['droprate']].reset_index().merge(his_year_total,
                                                                                      on=['high_his', 'year'])

df = d1.loc[:, ['high_his', 'year', 'droprate']].sort_values(by=['high_his', 'year'])
his = df.pivot(columns='high_his', index='year', values='droprate').reset_index().rename(
    columns={0: 'Low Hispanic', 1: 'High Hispanic'})

frpm_year_total = data.groupby(['high_frpm', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'frpm_year_total'})

d1 = frpm_year_total.merge(data, on=['high_frpm', 'year'])
d1['weight'] = d1['totalenroll'] / d1['frpm_year_total']
d1['droprate'] = d1['droprate'] * d1['weight']
d1 = d1.groupby(['high_frpm', 'year']).sum().loc[:, ['droprate']].reset_index().merge(frpm_year_total,
                                                                                       on=['high_frpm', 'year'])

df = d1.loc[:, ['high_frpm', 'year', 'droprate']].sort_values(by=['high_frpm', 'year'])
frpm = df.pivot(columns='high_frpm', index='year', values='droprate').reset_index().rename(
    columns={0: 'Low ED', 1: 'High ED'})

black.merge(his, on = 'year').merge(frpm, on = 'year').set_index('year').to_csv('drop_change_attribute.csv')

In [23]:
# drop change by mode
state = ['arizona', 'colorado', 'georgia']

low_vir = []
for i in state:
    low_vir.extend(weighted_median_by_state(i, 'dropout', 'virtualper'))
    
low_hy= []
for i in state:
    low_hy.extend(weighted_median_by_state(i, 'dropout', 'hybridper'))
    
state = ['wisconsin']

for i in state:
    low_vir.extend(weighted_median_by_state(i, 'all', 'virtualper'))
    
for i in state:
    low_hy.extend(weighted_median_by_state(i, 'all', 'hybridper'))
    
data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totalenroll'] / d1['vir_year_total']
d1['droprate'] = d1['droprate'] * d1['weight']
d1 = d1.groupby(['high_vir', 'year']).sum().loc[:, ['droprate']].reset_index().merge(vir_year_total,
                                          on=['high_vir', 'year'])

df = d1.loc[:, ['high_vir', 'year', 'droprate']].sort_values(by=['high_vir', 'year'])
vir = df.pivot(columns='high_vir', index='year', values='droprate').reset_index().rename(
    columns={0: 'Low Virtual', 1: 'High Virtual'})

hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totalenroll'] / d1['hy_year_total']
d1['droprate'] = d1['droprate'] * d1['weight']
d1 = d1.groupby(['high_hy', 'year']).sum().loc[:, ['droprate']].reset_index().merge(hy_year_total,
                                                                                      on=['high_hy', 'year'])

df = d1.loc[:, ['high_hy', 'year', 'droprate']].sort_values(by=['high_hy', 'year'])
hy = df.pivot(columns='high_hy', index='year', values='droprate').reset_index().rename(
    columns={0: 'Low Hybrid', 1: 'High Hybrid'})

vir.merge(hy, on = 'year').set_index('year').to_csv('drop_change_mode.csv')

In [24]:
data = pd.read_csv('colorado_dropout.csv').loc[:, ['year', 'mergecode', 'state', 'totalenroll', 'droprate',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')   

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totalenroll'] / d1['vir_year_total']
d1['droprate'] = d1['droprate'] * d1['weight']
d1 = d1.groupby(['high_vir', 'year']).sum().loc[:, ['droprate']].reset_index().merge(vir_year_total,
                                          on=['high_vir', 'year'])

df = d1.loc[:, ['high_vir', 'year', 'droprate']].sort_values(by=['high_vir', 'year'])
vir = df.pivot(columns='high_vir', index='year', values='droprate').reset_index().rename(
    columns={0: 'Low Virtual', 1: 'High Virtual'})

hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totalenroll'] / d1['hy_year_total']
d1['droprate'] = d1['droprate'] * d1['weight']
d1 = d1.groupby(['high_hy', 'year']).sum().loc[:, ['droprate']].reset_index().merge(hy_year_total,
                                                                                      on=['high_hy', 'year'])

df = d1.loc[:, ['high_hy', 'year', 'droprate']].sort_values(by=['high_hy', 'year'])
hy = df.pivot(columns='high_hy', index='year', values='droprate').reset_index().rename(
    columns={0: 'Low Hybrid', 1: 'High Hybrid'})

vir.merge(hy, on = 'year').set_index('year').to_csv('drop_change_mode_co.csv')

In [25]:
data = pd.read_csv('arizona_dropout.csv').loc[:, ['year', 'mergecode', 'state', 'totalenroll', 'droprate',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')  

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totalenroll'] / d1['vir_year_total']
d1['droprate'] = d1['droprate'] * d1['weight']
d1 = d1.groupby(['high_vir', 'year']).sum().loc[:, ['droprate']].reset_index().merge(vir_year_total,
                                          on=['high_vir', 'year'])

df = d1.loc[:, ['high_vir', 'year', 'droprate']].sort_values(by=['high_vir', 'year'])
vir = df.pivot(columns='high_vir', index='year', values='droprate').reset_index().rename(
    columns={0: 'Low Virtual', 1: 'High Virtual'})

hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totalenroll'] / d1['hy_year_total']
d1['droprate'] = d1['droprate'] * d1['weight']
d1 = d1.groupby(['high_hy', 'year']).sum().loc[:, ['droprate']].reset_index().merge(hy_year_total,
                                                                                      on=['high_hy', 'year'])

df = d1.loc[:, ['high_hy', 'year', 'droprate']].sort_values(by=['high_hy', 'year'])
hy = df.pivot(columns='high_hy', index='year', values='droprate').reset_index().rename(
    columns={0: 'Low Hybrid', 1: 'High Hybrid'})

vir.merge(hy, on = 'year').set_index('year').to_csv('drop_change_mode_az.csv')

In [26]:
data = pd.read_csv('georgia_dropout.csv').loc[:, ['year', 'mergecode', 'state', 'totalenroll', 'droprate',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')  

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totalenroll'] / d1['vir_year_total']
d1['droprate'] = d1['droprate'] * d1['weight']
d1 = d1.groupby(['high_vir', 'year']).sum().loc[:, ['droprate']].reset_index().merge(vir_year_total,
                                          on=['high_vir', 'year'])

df = d1.loc[:, ['high_vir', 'year', 'droprate']].sort_values(by=['high_vir', 'year'])
vir = df.pivot(columns='high_vir', index='year', values='droprate').reset_index().rename(
    columns={0: 'Low Virtual', 1: 'High Virtual'})

hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totalenroll'] / d1['hy_year_total']
d1['droprate'] = d1['droprate'] * d1['weight']
d1 = d1.groupby(['high_hy', 'year']).sum().loc[:, ['droprate']].reset_index().merge(hy_year_total,
                                                                                      on=['high_hy', 'year'])

df = d1.loc[:, ['high_hy', 'year', 'droprate']].sort_values(by=['high_hy', 'year'])
hy = df.pivot(columns='high_hy', index='year', values='droprate').reset_index().rename(
    columns={0: 'Low Hybrid', 1: 'High Hybrid'})

vir.merge(hy, on = 'year').set_index('year').to_csv('drop_change_mode_ga.csv')

In [27]:
data = pd.read_csv('wisconsin_all.csv').loc[:, ['year', 'mergecode', 'state', 'totalenroll', 'droprate',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')  

data['high_vir'] = 1
data.loc[data['mergecode'].isin(low_vir), 'high_vir'] = 0
data['high_hy'] = 1
data.loc[data['mergecode'].isin(low_hy), 'high_hy'] = 0

vir_year_total = data.groupby(['high_vir', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'vir_year_total'})

d1 = vir_year_total.merge(data, on=['high_vir', 'year'])
d1['weight'] = d1['totalenroll'] / d1['vir_year_total']
d1['droprate'] = d1['droprate'] * d1['weight']
d1 = d1.groupby(['high_vir', 'year']).sum().loc[:, ['droprate']].reset_index().merge(vir_year_total,
                                          on=['high_vir', 'year'])

df = d1.loc[:, ['high_vir', 'year', 'droprate']].sort_values(by=['high_vir', 'year'])
vir = df.pivot(columns='high_vir', index='year', values='droprate').reset_index().rename(
    columns={0: 'Low Virtual', 1: 'High Virtual'})

hy_year_total = data.groupby(['high_hy', 'year']).sum().loc[:, 'totalenroll'].reset_index().rename(
    columns={'totalenroll': 'hy_year_total'})

d1 = hy_year_total.merge(data, on=['high_hy', 'year'])
d1['weight'] = d1['totalenroll'] / d1['hy_year_total']
d1['droprate'] = d1['droprate'] * d1['weight']
d1 = d1.groupby(['high_hy', 'year']).sum().loc[:, ['droprate']].reset_index().merge(hy_year_total,
                                                                                      on=['high_hy', 'year'])

df = d1.loc[:, ['high_hy', 'year', 'droprate']].sort_values(by=['high_hy', 'year'])
hy = df.pivot(columns='high_hy', index='year', values='droprate').reset_index().rename(
    columns={0: 'Low Hybrid', 1: 'High Hybrid'})

vir.merge(hy, on = 'year').set_index('year').to_csv('drop_change_mode_wi.csv')