In [7]:
import pandas as pd
import scipy.stats as stats
import numpy as np

def cramersV(chi_2, n, k, r):
    phi_2 = chi_2/n
    phi_2corr = max(0, phi_2 - ((k-1)*(r-1))/(n-1))
    rcorr = r - ((r-1)**2)/(n-1)
    kcorr = k - ((k-1)**2)/(n-1)
    if min((kcorr -1), (rcorr - 1)) == 0:
        return -1
    return np.sqrt(phi_2corr / min((kcorr -1), (rcorr - 1)))
    
df = pd.read_csv('dat.csv') 

# Combined death column
df['death'] = df[['death.within.28.days', 'death.within.3.months', 'death.within.6.months']].max(axis=1)

# Normal range bounds for creatinine
lower = 44
upper = 110

# 1 = abnormal, 0 = normal
df['creatinine_abnormal'] = (~df['creatinine.enzymatic.method'].between(lower, upper)).astype(int)

# Normal range bounds for sodium
lower = 137
upper = 147

# 1 = abnormal, 0 = normal
df['sodium_abnormal'] = (~df['sodium'].between(lower, upper)).astype(int)

# Contingency tables for Chi-Square Test
# Acute renal failure
contingency_table_renal = pd.crosstab(df['acute.renal.failure'], df['death'])
# Moderate to severe kidney desease
contingency_table_kidney = pd.crosstab(df['moderate.to.severe.chronic.kidney.disease'], df['death'])
# Creatinine
contingency_table_creatinine = pd.crosstab(df['creatinine_abnormal'], df['death'])
# Sodium
contingency_table_sodium = pd.crosstab(df['sodium_abnormal'], df['death'])
# Control (leukemia has all 0's)
contingency_table_control = pd.crosstab(df['leukemia'], df['death'])

contingency_table_kidney_creatinine = pd.crosstab(df['moderate.to.severe.chronic.kidney.disease'], df['creatinine_abnormal'])

print(contingency_table_renal, '\n')
print(contingency_table_kidney, '\n')
print(contingency_table_creatinine, '\n')
print(contingency_table_sodium, '\n')
print(contingency_table_control, '\n')

# Chi-Square Tests

chi2_renal, p_renal, dof_renal, expected_renal = stats.chi2_contingency(contingency_table_renal)
r, k = contingency_table_renal.shape
cramer_renal = cramersV(chi2_renal, contingency_table_renal.sum().sum(), k, r)
chi2_kidney, p_kidney, dof_kidney, expected_kidney = stats.chi2_contingency(contingency_table_kidney)
r, k = contingency_table_kidney.shape
cramer_kidney = cramersV(chi2_kidney, contingency_table_kidney.sum().sum(), k, r)
chi2_creatinine, p_creatinine, dof_creatinine, expected_creatinine = stats.chi2_contingency(contingency_table_creatinine)
r, k = contingency_table_creatinine.shape
cramer_creatinine = cramersV(chi2_creatinine, contingency_table_creatinine.sum().sum(), k, r)
chi2_sodium, p_sodium, dof_sodium, expected_sodium = stats.chi2_contingency(contingency_table_sodium)
r, k = contingency_table_sodium.shape
cramer_sodium = cramersV(chi2_sodium, contingency_table_sodium.sum().sum(), k, r)
chi2_control, p_control, dof_control, expected_control = stats.chi2_contingency(contingency_table_control)
r, k = contingency_table_control.shape
cramer_control = cramersV(chi2_control, contingency_table_control.sum().sum(), k, r)

chi2, p, dof, expected = stats.chi2_contingency(contingency_table_kidney_creatinine)
r, k = contingency_table_kidney_creatinine.shape
cramer_last = cramersV(chi2, contingency_table_kidney_creatinine.sum().sum(), k, r)

# Result: If p < 0.05, there's a significant correlation
print("Cramer's V test for acute renal failure:", cramer_renal)
print("Cramer's V test for moderate to severe kidney disease:", cramer_kidney)
print("Cramer's V test for creatinine:", cramer_creatinine)
print("Cramer's V test for sodium:", cramer_sodium)
print("Cramer's V test for leukemia:", cramer_control)
print(cramer_last)


death                   0   1
acute.renal.failure          
0                    1946  55
1                       5   2 

death                                         0   1
moderate.to.severe.chronic.kidney.disease          
0.0                                        1499  33
1.0                                         450  24 

death                   0   1
creatinine_abnormal          
0                    1260  25
1                     691  32 

death               0   1
sodium_abnormal          
0                1322  27
1                 629  30 

death        0   1
leukemia          
0         1951  57 

Cramer's V test for acute renal failure: 0.06234568426986609
Cramer's V test for moderate to severe kidney disease: 0.06725337576782989
Cramer's V test for creatinine: 0.06485297959889354
Cramer's V test for sodium: 0.0652312929949093
Cramer's V test for leukemia: -1
0.4320283805735213
