In [6]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr

In [18]:
def pairwise_cor(data, col1, col2):
    df = len(data) - 2
    standard_error = 1 / np.sqrt(df)
    margin_of_error = 1.96 * standard_error
    coef, p_value = pearsonr(data[col1], data[col2])
    lower_bound = coef - margin_of_error
    upper_bound = coef + margin_of_error
    print("Correlation for:", col1, col2)
    print("Correlation coefficient:", coef)
    print("P-value:", p_value)
    print("95% Confidence Interval: [{}, {}]".format(lower_bound, upper_bound))

# Math Proficiency Rate

In [19]:
drop = pd.read_csv('math_drop.csv').math_drop.to_list()
in_math = pd.read_csv('indiana_mathpass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_math, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]
data['year'] = data['year'].astype('str')

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')
data = data[data['year'] == '21']

pairwise_cor(data, 'black', 'virtualper')
pairwise_cor(data, 'black', 'hybridper')
pairwise_cor(data, 'hispanic', 'virtualper')
pairwise_cor(data, 'hispanic', 'hybridper')
pairwise_cor(data, 'lowincome', 'virtualper')
pairwise_cor(data, 'lowincome', 'hybridper')

Correlation for: black virtualper
Correlation coefficient: 0.6142517607774214
P-value: 2.1774219603356488e-132
95% Confidence Interval: [0.5591660169001258, 0.669337504654717]
Correlation for: black hybridper
Correlation coefficient: 0.028737962746147525
P-value: 0.30652932483738554
95% Confidence Interval: [-0.02634778113114806, 0.0838237066234431]
Correlation for: hispanic virtualper
Correlation coefficient: 0.5089536930093206
P-value: 1.680148697889769e-84
95% Confidence Interval: [0.453867949132025, 0.5640394368866162]
Correlation for: hispanic hybridper
Correlation coefficient: 0.042441513688547566
P-value: 0.13091858674040052
95% Confidence Interval: [-0.012644230188748017, 0.09752725756584316]
Correlation for: lowincome virtualper
Correlation coefficient: 0.5230728323802093
P-value: 5.669061131537828e-90
95% Confidence Interval: [0.4679870885029137, 0.5781585762575049]
Correlation for: lowincome hybridper
Correlation coefficient: -0.0007555531582081167
P-value: 0.978557078676572

# ELA Proficiency Rate

In [20]:
drop = pd.read_csv('ela_inf_remove.csv').ela_drop.to_list()
in_ela = pd.read_csv('indiana_elapass.csv')
il = pd.read_csv('illinois_all.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([in_ela, il, wi]).loc[:, ['year', 'mergecode', 'state', 'totaltest', 'mathpass',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]
data['year'] = data['year'].astype('str')

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')
data = data[data['year'] == '21']

pairwise_cor(data, 'black', 'virtualper')
pairwise_cor(data, 'black', 'hybridper')
pairwise_cor(data, 'hispanic', 'virtualper')
pairwise_cor(data, 'hispanic', 'hybridper')
pairwise_cor(data, 'lowincome', 'virtualper')
pairwise_cor(data, 'lowincome', 'hybridper')

Correlation for: black virtualper
Correlation coefficient: 0.595176258445419
P-value: 4.911718443320368e-127
95% Confidence Interval: [0.5411265699977251, 0.6492259468931129]
Correlation for: black hybridper
Correlation coefficient: 0.006620275726282778
P-value: 0.8103088753083838
95% Confidence Interval: [-0.04742941272141112, 0.06066996417397668]
Correlation for: hispanic virtualper
Correlation coefficient: 0.5359653485741352
P-value: 8.224737800960214e-99
95% Confidence Interval: [0.4819156601264413, 0.5900150370218291]
Correlation for: hispanic hybridper
Correlation coefficient: 0.02629333408135847
P-value: 0.3403586324684938
95% Confidence Interval: [-0.02775635436633543, 0.08034302252905237]
Correlation for: lowincome virtualper
Correlation coefficient: 0.5128806182321658
P-value: 2.973751421660141e-89
95% Confidence Interval: [0.4588309297844719, 0.5669303066798597]
Correlation for: lowincome hybridper
Correlation coefficient: -0.04148506071159548
P-value: 0.13239309377095362
95

# Dropout Rate

In [21]:
drop = pd.read_csv('drop_inf_remove.csv').drop_drop.to_list() + ['644-0500georgia',
 '4412-90822arizona',
 '4403-6264arizona',
 '4320-90159arizona',
 '4221-90064arizona',
 '4196-92913arizona',
 '79874-78813arizona',
 '1010-5146colorado',
 '0870-2155colorado',
 '772-0110georgia',
 '4431-5877arizona']
az = pd.read_csv('arizona_dropout.csv')
co = pd.read_csv('colorado_dropout.csv')
ga = pd.read_csv('georgia_dropout.csv')
wi = pd.read_csv('wisconsin_all.csv')
data = pd.concat([az, co, ga, wi]).loc[:, ['year', 'mergecode', 'state', 'totalenroll', 'droprate',
                                            'virtualper', 'hybridper', 'black', 'hispanic',
                                            'white', 'lowincome']]

data = data.loc[~data['mergecode'].isin(drop), :].reset_index().drop(columns = 'index')
data['year'] = data['year'].astype('str')
data = data[data['year'] == '21']

pairwise_cor(data, 'black', 'virtualper')
pairwise_cor(data, 'black', 'hybridper')
pairwise_cor(data, 'hispanic', 'virtualper')
pairwise_cor(data, 'hispanic', 'hybridper')
pairwise_cor(data, 'lowincome', 'virtualper')
pairwise_cor(data, 'lowincome', 'hybridper')

Correlation for: black virtualper
Correlation coefficient: 0.4369616731457225
P-value: 2.897908209425087e-60
95% Confidence Interval: [0.3818759292684269, 0.4920474170230181]
Correlation for: black hybridper
Correlation coefficient: -0.1346953249053259
P-value: 1.4824660521242653e-06
95% Confidence Interval: [-0.1897810687826215, -0.07960958102803031]
Correlation for: hispanic virtualper
Correlation coefficient: 0.39415341743701887
P-value: 2.1741235421379714e-48
95% Confidence Interval: [0.3390676735597233, 0.44923916131431446]
Correlation for: hispanic hybridper
Correlation coefficient: 0.17085265154701607
P-value: 9.185134793719629e-10
95% Confidence Interval: [0.11576690766972048, 0.22593839542431166]
Correlation for: lowincome virtualper
Correlation coefficient: 0.4130987653647204
P-value: 1.966968916530988e-53
95% Confidence Interval: [0.3580130214874248, 0.468184509242016]
Correlation for: lowincome hybridper
Correlation coefficient: -0.0670479139315365
P-value: 0.01694767467641