# Perform a Chi-squared test of independence

## Libraries and settings

In [3]:
# Libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import chi2
from scipy.stats import chi2_contingency

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Get current working directory
print(os.getcwd())

/workspaces/data_analytics/GK/Week_07


## Contingency table

In [7]:
# Contingency table table showing the number of men and women buying different types of pets.     
#         dog  cat  bird
#    men	 207  282  241
#  women	 234  242  232

tab = np.array([[207,282,241], [234,242,232]])
tab

array([[207, 282, 241],
       [234, 242, 232]])

## Perform a Chi-square test on the contingency table

In [8]:
# Null- and alternative hypothesis
# H0 = There is no association [relationship] between the gender and type of pet 
# H1 = There is an association [relationship] between the gender and type of pet 

# alpha
alpha = 0.05

# 1-alpha
prob = 0.95

# Chi-squared test
stat, p, dof, expected = chi2_contingency(tab)

# Critical value
critical = chi2.ppf(prob, dof)

# Print results of the Chi-square test
print('Expected frequencies:', '\n', expected.round(4), '\n')
print('Critical value:', critical.round(4), '\n')
print('Test statistic:', stat.round(4), '\n')
print('Degrees of freedom:', dof, '\n')
print(f'p-value: {p:.4f}')

Expected frequencies: 
 [[223.8734 266.0083 240.1182]
 [217.1266 257.9917 232.8818]] 

Critical value: 5.9915 

Test statistic: 4.5422 

Degrees of freedom: 2 

p-value: 0.1032


# Interpretation of test result

In [None]:
# Because the p-value of the Chi-squared test is larger than our significance-level alpha, we cannot reject the null-hypothesis. 
# There is no relationship between gender and type of pet bought.