## Question: 
Calculate the relationship between stability within the health insurance system (use member months) and its relationship to chronic disease rate 

## Analysis: 
Logistic regression with binary coded chronic disease as the outcome of interest and independent variables that includes member months as one of the predictors controlling for demographic characteristics 

In [162]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [163]:
import pandas as pd
from scipy.stats import chi2
import warnings
warnings.filterwarnings("ignore")
med = pd.read_csv('../../data/reshaped_med.csv')
enroll = pd.read_csv('../../data/enroll.csv')
enroll.rename(columns={'Member ID Encrypted':'member_id'}, inplace=True)
enroll.rename(columns={'Incurred Year':'year'}, inplace=True)
enroll.rename(columns={'Total Medical Member Months':'month'}, inplace=True)
enroll.rename(columns={'Age':'age'}, inplace=True)
data = pd.merge(med, enroll, on=['member_id', 'year', 'age'], how='left')
data = data[data['year'] == 2016]
#list(data)

#Kruskall-Wallis test
def regression(b_name, a_name):
    df = data[[a_name, b_name]]
    k = df[b_name].nunique()
    N = df[b_name].count()
    df['rank'] = df[a_name].rank(ascending=True)
    table1 = df.groupby(b_name).sum()
    table2 = df.groupby(b_name).count()
    
    sub_component = 0
    for i in range(0, k):
        sub_component = (table1.iloc[i, 1] ** 2 / table2.iloc[i, 1]) + sub_component

    test_statistic = ((12 / (N * (N + 1))) * sub_component) - 3 * (N + 1)

    degrees_of_freedom = k - 1
    a = 0.05 / 100
    chi_critical_value = round(chi2.isf(q=a, df=degrees_of_freedom), 2)
    p_value = chi2.sf(test_statistic, degrees_of_freedom)

    print('Rejection Criteria:')
    
    print('Reject null hypothesis at', 0.05, '% level of significance '
            'if Test Statistic is greater than or equal ')
    
    print('chi_critical_value:', chi_critical_value)

    table = {'Variable': a_name + ' grouped by ' + b_name, 'Test Statistic': round(test_statistic,4),
               'Critical Value': chi_critical_value,'P value': p_value}
    print(table)
    
print('read in data')

read in data


### Check correlation between asthma and month

In [164]:
regression('month', 'binary_asthma')

Rejection Criteria:
Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal 
chi_critical_value: 33.14
{'Variable': 'binary_asthma grouped by month', 'Test Statistic': 665.6706, 'Critical Value': 33.14, 'P value': 1.2261560029277442e-135}


### Check correlation between bp and month

In [165]:
regression('month', 'binary_bp')

Rejection Criteria:
Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal 
chi_critical_value: 33.14
{'Variable': 'binary_bp grouped by month', 'Test Statistic': 760.6756, 'Critical Value': 33.14, 'P value': 5.229307286984232e-156}


### Check correlation between cancer and month

In [166]:
regression('month', 'binary_cancer')

Rejection Criteria:
Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal 
chi_critical_value: 33.14
{'Variable': 'binary_cancer grouped by month', 'Test Statistic': 639.3735, 'Critical Value': 33.14, 'P value': 5.2523668056349975e-130}


### Check correlation between cardiovascular and month

In [167]:
regression('month', 'binary_cardiovascular')

Rejection Criteria:
Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal 
chi_critical_value: 33.14
{'Variable': 'binary_cardiovascular grouped by month', 'Test Statistic': 680.5151, 'Critical Value': 33.14, 'P value': 8.092065702669175e-139}


### Check correlation between cholesterol and month

In [168]:
regression('month', 'binary_cholesterol')

Rejection Criteria:
Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal 
chi_critical_value: 33.14
{'Variable': 'binary_cholesterol grouped by month', 'Test Statistic': 766.1709, 'Critical Value': 33.14, 'P value': 3.460878152406013e-157}


### Check correlation between copd and month

In [169]:
regression('month','binary_copd')

Rejection Criteria:
Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal 
chi_critical_value: 33.14
{'Variable': 'binary_copd grouped by month', 'Test Statistic': 656.4104, 'Critical Value': 33.14, 'P value': 1.180480553680066e-133}


### Check correlation between diabetes and month

In [170]:
regression('month', 'binary_diabetes')

Rejection Criteria:
Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal 
chi_critical_value: 33.14
{'Variable': 'binary_diabetes grouped by month', 'Test Statistic': 724.7373, 'Critical Value': 33.14, 'P value': 2.6793976813034046e-148}


### Check correlation between kidney and month

In [171]:
regression('month', 'binary_kidney')

Rejection Criteria:
Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal 
chi_critical_value: 33.14
{'Variable': 'binary_kidney grouped by month', 'Test Statistic': 643.3233, 'Critical Value': 33.14, 'P value': 7.493179469354042e-131}


### Check correlation between mental_disorder and month

In [172]:
regression('month', 'binary_mental_disorder')

Rejection Criteria:
Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal 
chi_critical_value: 33.14
{'Variable': 'binary_mental_disorder grouped by month', 'Test Statistic': 664.8255, 'Critical Value': 33.14, 'P value': 1.8602462426197466e-135}


### Check correlation between musculoskeletal and month

In [173]:
regression('month', 'binary_musculoskeletal')

Rejection Criteria:
Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal 
chi_critical_value: 33.14
{'Variable': 'binary_musculoskeletal grouped by month', 'Test Statistic': 702.8754, 'Critical Value': 33.14, 'P value': 1.3049495733977208e-143}


### Check correlation between obesity and month

In [174]:
regression('month', 'binary_obesity')

Rejection Criteria:
Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal 
chi_critical_value: 33.14
{'Variable': 'binary_obesity grouped by month', 'Test Statistic': 722.7319, 'Critical Value': 33.14, 'P value': 7.212726130458747e-148}
