# Contingency tables

In [1]:
import pandas as pd
import numpy as np

In [15]:
col_names = ['survived', 'died']
row_names = ['treated', 'not_treated']

# Create a contingency table for males
males_df = pd.DataFrame([[50, 60], [30, 40]], columns=col_names, index=row_names)
females_df = pd.DataFrame([[60, 30], [90, 50]], columns=col_names, index=row_names)

In [16]:
males_df

Unnamed: 0,survived,died
treated,50,60
not_treated,30,40


In [17]:
females_df

Unnamed: 0,survived,died
treated,60,30
not_treated,90,50


## Perform Chi-Square test of independence

Determine whether treatment and outcome (i.e. survived or died) are independent.

H0: treatment and outcome are independent
H1: treatment and outcome are dependent

If the p-value is less than 0.05, we reject the null hypothesis and conclude that treatment and outcome are dependent.



In [20]:
import scipy.stats as stats

males_chi_square_result = stats.chi2_contingency(males_df)
females_chi_square_result = stats.chi2_contingency(females_df)


In [21]:
# Print the results

def print_stats(chi_square_result):
    print('------------------------')
    print('Chi-square value: ', chi_square_result[0])
    p_value = chi_square_result[1]
    if p_value < 0.05:
        print('p-value: ', chi_square_result[1], ' (statistically significant)')
        print('Reject the null hypothesis')
    else:
        print('p-value: ', chi_square_result[1], ' (not statistically significant)')
        print('Fail to reject the null hypothesis')
    
    print('Degree of freedom: ', chi_square_result[2])
    print('Expected frequencies: ', chi_square_result[3])
    print('------------------------')
    
print_stats(males_chi_square_result)
print('')
print_stats(females_chi_square_result)

------------------------
Chi-square value:  0.03535714285714309
p-value:  0.8508492527705047  (not statistically significant)
Fail to reject the null hypothesis
Degree of freedom:  1
Expected frequencies:  [[48.88888889 61.11111111]
 [31.11111111 38.88888889]]
------------------------

------------------------
Chi-square value:  0.0520618386243384
p-value:  0.8195135892996177  (not statistically significant)
Fail to reject the null hypothesis
Degree of freedom:  1
Expected frequencies:  [[58.69565217 31.30434783]
 [91.30434783 48.69565217]]
------------------------


## Join the contingency table for male and female

In [22]:
# Add the male and female contingency tables together
# I.e. ignore the gender column
df = males_df.add(females_df)

df

Unnamed: 0,survived,died
treated,110,90
not_treated,120,90


In [23]:
print_stats(stats.chi2_contingency(df))

------------------------
Chi-square value:  0.11389478375891327
p-value:  0.7357528158315947  (not statistically significant)
Fail to reject the null hypothesis
Degree of freedom:  1
Expected frequencies:  [[112.19512195  87.80487805]
 [117.80487805  92.19512195]]
------------------------
