## Question: 
Calculate the relationship between stability within the health insurance system (use member months) and its relationship to chronic disease rate 

## Analysis: 
Logistic regression with binary coded chronic disease as the outcome of interest and independent variables that includes member months as one of the predictors controlling for demographic characteristics 

In [193]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [146]:
import pandas as pd
from scipy.stats import chi2
import warnings
warnings.filterwarnings("ignore")
med = pd.read_csv('../../data/reshaped_med.csv')
enroll = pd.read_csv('../../data/enroll.csv')
enroll.rename(columns={'Member ID Encrypted':'member_id'}, inplace=True)
enroll.rename(columns={'Incurred Year':'year'}, inplace=True)
enroll.rename(columns={'Total Medical Member Months':'month'}, inplace=True)
enroll.rename(columns={'Age':'age'}, inplace=True)
data = pd.merge(med, enroll, on=['member_id', 'year', 'age'], how='left')
data = data[data['year'] == 2016]
#list(data)

def regression(b_name, a_name):
    df = data[[a_name, b_name]]
    k = df[b_name].nunique()
    N = df[b_name].count()
    df['rank'] = df[a_name].rank(ascending=True)
    table1 = df.groupby(b_name).sum()
    table2 = df.groupby(b_name).count()
    
    sub_component = 0
    for i in range(0, k):
        sub_component = (table1.iloc[i, 1] ** 2 / table2.iloc[i, 1]) + sub_component

    test_statistic = ((12 / (N * (N + 1))) * sub_component) - 3 * (N + 1)

    degrees_of_freedom = k - 1
    a = 0.05 / 100
    chi_critical_value = round(chi2.isf(q=a, df=degrees_of_freedom), 2)
    p_value = chi2.sf(test_statistic, degrees_of_freedom)

    print('Rejection Criteria:')
    
    print('Reject null hypothesis at', 0.05, '% level of significance '
            'if Test Statistic is greater than or equal ')
    
    print('chi_critical_value:', chi_critical_value)

    table = {'Variable': a_name + ' grouped by ' + b_name, 'Test Statistic': round(test_statistic,4),
               'Critical Value': chi_critical_value,'P value': p_value}
    print(table)
    
print('read in data')

read in data


### Check correlation between asthma and month

In [147]:
regression('month', 'binary_asthma')

Rejection Criteria:
Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal 
chi_critical_value: 33.14
{'Variable': 'binary_asthma grouped by month', 'Test Statistic': 267.7984, 'Critical Value': 33.14, 'P value': 5.1842044370864567e-51}


### Check correlation between bp and month

In [86]:
regression('month', 'binary_bp')
regression('month', 'claims_bp')


 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  12.12 .

 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  41.31 .


### Check correlation between cancer and month

In [87]:
regression('month', 'binary_cancer')
regression('month', 'claims_cancer')


 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  12.12 .

 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  57.86 .


### Check correlation between cardiovascular and month

In [88]:
regression('month', 'binary_cardiovascular')
regression('month', 'claims_cardiovascular')


 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  12.12 .

 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  54.95 .


### Check correlation between cholesterol and month

In [89]:
regression('month', 'binary_cholesterol')
regression('month', 'claims_cholesterol')


 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  12.12 .

 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  34.82 .


### Check correlation between copd and month

In [90]:
regression('month','binary_copd')
regression('month', 'claims_copd')


 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  12.12 .

 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  34.82 .


### Check correlation between diabetes and month

In [91]:
regression('month', 'binary_diabetes')
regression('month', 'claims_diabetes')


 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  12.12 .

 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  50.51 .


### Check correlation between kidney and month

In [92]:
regression('month', 'binary_kidney')
regression('month', 'claims_kidney')


 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  12.12 .

 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  45.97 .


### Check correlation between mental_disorder and month

In [93]:
regression('month', 'binary_mental_disorder')
regression('month', 'claims_mental_disorder')


 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  12.12 .

 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  24.1 .


### Check correlation between musculoskeletal and month

In [94]:
regression('month', 'binary_musculoskeletal')
regression('month', 'claims_musculoskeletal')


 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  12.12 .

 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  26.02 .


### Check correlation between obesity and month

In [95]:
regression('month', 'binary_obesity')
regression('month', 'claims_obesity')


 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  12.12 .

 Rejection Criteria: Reject null hypothesis at 0.05 % level of significance if Test Statistic is greater than or equal  41.31 .
