In [29]:
# Import the required Python packages
import pandas as pd
from scipy.stats import chi2

In [12]:
df = pd.DataFrame(data={
    "Hearts": 441,
    "Spades": 404,
    "Diamonds": 402,
    "Clubs": 353
}, index=["count"])
df

Unnamed: 0,Hearts,Spades,Diamonds,Clubs
count,441,404,402,353


# Hypothesis

$H_0: P_H = P_S = P_D = P_C$

$H_1: P_H \neq P_S \neq P_D \neq P_C$

In [19]:
table = pd.DataFrame(data={
    "Hearts":   [441, 400, 441 - 400, (441 - 400) ** 2, (441 - 400) ** 2 / 400],
    "Spades":   [404, 400, 404 - 400, (404 - 400) ** 2, (404 - 400) ** 2 / 400],
    "Diamonds": [402, 400, 402 - 400, (402 - 400) ** 2, (402 - 400) ** 2 / 400],
    "Clubs":    [353, 400, 353 - 400, (353 - 400) ** 2, (353 - 400) ** 2 / 400],
}, index=["observed", "expected", "observed - expected", "O - E squared", "(O - E) squared / E"])
table

Unnamed: 0,Hearts,Spades,Diamonds,Clubs
observed,441.0,404.0,402.0,353.0
expected,400.0,400.0,400.0,400.0
observed - expected,41.0,4.0,2.0,-47.0
O - E squared,1681.0,16.0,4.0,2209.0
(O - E) squared / E,4.2025,0.04,0.01,5.5225


In [20]:
# Calculate the chi-square stats
sum_value = table.loc["(O - E) squared / E"].sum()
sum_value

np.float64(9.774999999999999)

In [31]:
# See the chi-square distributions table, and look for degree of freedom (4 - 1) or 3 and alpha = 0.05
# The critical value is 7.81
# Compare the chi-square stats with the critical value
critical_value = chi2.ppf(q=1-0.05, df=3)
critical_value

np.float64(7.8147279032511765)

# Reject the $H_0$

$\chi^2_{(3)} = 9.77, p < 0.05$

Since $\chi^2 > \text{critical value}$, we reject $H_0$

In [33]:
# Reject the H0 if the chi-square stats is greater than the critical value
sum_value > critical_value

np.True_