In [1]:
import statsmodels.stats.weightstats as statsM
from scipy import stats
import numpy as np
import pandas as pd

### Chi-Squared Goodness-Of-Fit Test

In [4]:
# Experiment 1:
## n = 279
rolls_expected = [46.5, 46.5, 46.5, 46.5, 46.5, 46.5]
rolls_actual =  [59, 63, 37, 38, 32, 50]
stat, p_val = stats.chisquare(rolls_actual, rolls_expected)
print(p_val)

0.003651257113910144


### Chi-Squared Test of Independence

In [None]:
First check the problem statement:
    Data:
        Democrat      Republican(not Democrat)
Black
White

H_0: P(Black) = P(White)
H_a: P(Black) != P(White)

In [14]:
### Data
voter_race = np.random.choice(a= ['black','white'],
                             p = [0.4, 0.6],
                             size = 1000)

voter_party = np.random.choice(a= ['democrat', 'republican'],
                             p = [0.45, 0.55],
                             size = 1000)

voter = pd.DataFrame({"race":voter_race, "party": voter_party})

voter_tab = pd.crosstab(voter.race, voter.party, margins = True)
voter_tab.index = ['black', 'white', "col_total"]
voter_tab.columns = ['democrat', 'republican', "row_total"]
voter_tab

Unnamed: 0,democrat,republican,row_total
black,192,233,425
white,264,311,575
col_total,456,544,1000


In [15]:
observed = voter_tab.iloc[0:2, 0:2]
observed

Unnamed: 0,democrat,republican
black,192,233
white,264,311


In [16]:
chi2, p, dof, expected = stats.chi2_contingency(observed = observed)
print("chi squared val:- ", chi2)
print("p value:- ", p)
print("degree of freedom:- ", dof)
print("expected:- \n", expected)

chi squared val:-  0.02787829359769263
p value:-  0.8673952735070445
degree of freedom:-  1
expected:- 
 [[193.8 231.2]
 [262.2 312.8]]


In [9]:
### Data
voter_race = np.random.choice(a= ['asian', 'black', 'hispanic', 'white', 'other'],
                             p = [0.05, 0.15, 0.25, 0.05, 0.5],
                             size = 1000)

voter_party = np.random.choice(a= ['democrat', 'republican'],
                             p = [0.45, 0.55],
                             size = 1000)

voter = pd.DataFrame({"race":voter_race, "party": voter_party})

voter_tab = pd.crosstab(voter.race, voter.party, margins = True)
voter_tab.index = ['asian', 'black', 'hispanic', 'white', 'other', "col_total"]
voter_tab.columns = ['democrat', 'republican', "row_total"]
voter_tab

Unnamed: 0,democrat,republican,row_total
asian,24,26,50
black,52,80,132
hispanic,125,144,269
white,221,294,515
other,12,22,34
col_total,434,566,1000


In [11]:
observed = voter_tab.iloc[0:5, 0:2]
observed

Unnamed: 0,democrat,republican
asian,24,26
black,52,80
hispanic,125,144
white,221,294
other,12,22


In [13]:
chi2, p, dof, expected = stats.chi2_contingency(observed = observed)
print("chi squared val:- ", chi2)
print("p value:- ", p)
print("degree of freedom:- ", dof)
print("expected:- \n", expected)

chi squared val:-  3.2833599237721987
p value:-  0.5115730452042193
degree of freedom:-  4
expected:- 
 [[ 21.7    28.3  ]
 [ 57.288  74.712]
 [116.746 152.254]
 [223.51  291.49 ]
 [ 14.756  19.244]]


### Resources
1) https://www.youtube.com/watch?v=QE0v3HHcKbs <br>
2) https://towardsdatascience.com/running-chi-square-tests-in-python-with-die-roll-data-b9903817c51b <br>
3) https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.chisquare.html <br>