<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Chi-Square-Test" data-toc-modified-id="Chi-Square-Test-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Chi-Square Test</a></span></li><li><span><a href="#Chi-Squared-from-Scratch" data-toc-modified-id="Chi-Squared-from-Scratch-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Chi-Squared from Scratch</a></span></li></ul></div>

# Chi-Square Test
Ref: https://machinelearningmastery.com/chi-squared-test-for-machine-learning/


$\chi_{c}^{2}=\sum \frac{\left(O_{i}-E_{i}\right)^{2}}{E_{i}}$

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import os,sys,time
import scipy
import statsmodels

from scipy import stats
from scipy.stats import ttest_1samp
from scipy.stats import ttest_ind # independent means two samples.
from statsmodels.stats import weightstats as stests # stests.ztest

SEED = 100
pd.set_option('max_columns',100)
pd.set_option('plotting.backend','plotly') # matplotlib, bokeh, altair, plotly
%load_ext watermark
%watermark -iv

statsmodels 0.12.0
seaborn     0.11.0
numpy       1.18.4
autopep8    1.5.2
pandas      1.1.0
scipy       1.4.1
json        2.0.9



In [10]:
# contingency table
table = [	[10, 20, 30],
          [6,  9,  17]]
print('contingency table=\n', table)


stat, p, dof, expected = stats.chi2_contingency(table)
print('dof=%d' % dof)
print('expected values=\n',expected)
print()

# interpret test-statistic
alpha = 0.05
prob = 0.95
critical = stats.chi2.ppf(prob, dof)
print('probability=%.3f, critical=%.3f, stat=%.3f' % (prob, critical, stat))
print()

if abs(stat) >= critical:
    print('Dependent (reject H0)')
else:
    print('Independent (fail to reject H0)')
print()  
  
# interpret p-value
alpha = 1.0 - prob
print('significance=%.3f, p=%.3f' % (alpha, p))
if p <= alpha:
    print('Dependent (reject H0)')
else:
    print('Independent (fail to reject H0)')

contingency table=
 [[10, 20, 30], [6, 9, 17]]
dof=2
expected values=
 [[10.43478261 18.91304348 30.65217391]
 [ 5.56521739 10.08695652 16.34782609]]

probability=0.950, critical=5.991, stat=0.272

Independent (fail to reject H0)

significance=0.050, p=0.873
Independent (fail to reject H0)
