In [1]:
import numpy as np
import pandas as pd
from scipy.stats import chi2, chi2_contingency

# -------------------------------
# Step 1: Define contingency table
# -------------------------------
# Observed frequencies of salmon feed trials
salmon = pd.DataFrame({
    'feed': ['old', 'new'],
    'one': [780, 855],
    'two': [1004, 1083],
    'three': [1450, 1670]
})
salmon.set_index('feed', inplace=True)

# Optional: Add row and column totals for manual calculation
salmon.loc['total'] = salmon.sum(axis=0)
salmon['total'] = salmon.sum(axis=1)

print("Contingency Table with Totals:\n", salmon, "\n")

# --------------------------------------------
# Step 2: Manual calculation of chi-squared
# --------------------------------------------
previous = np.array(salmon)      # Include margins
rows, cols = previous.shape

# Degrees of freedom: (rows-1)*(cols-1) excluding totals
df = (rows-1-1)*(cols-1-1)  # -1 for totals
N = previous[-1, -1]          # Total count

# Compute expected counts (without totals)
expected = np.zeros((rows-1, cols-1))
for i in range(rows-1):
    for j in range(cols-1):
        expected[i,j] = (previous[i,-1] * previous[-1,j]) / N

# Compute chi-squared statistic manually
chi2_stat = 0.0
for i in range(rows-1):
    for j in range(cols-1):
        chi2_stat += ((previous[i,j] - expected[i,j])**2) / expected[i,j]

print(f"Manual chi-squared statistic: {chi2_stat:.3f}")

# Critical value for 95% confidence
cv = chi2.ppf(0.95, df=df)
print(f"Critical value (df={df}, alpha=0.05): {cv:.3f}")

if chi2_stat > cv:
    print("Result: Reject null hypothesis")
else:
    print("Result: Cannot reject null hypothesis\n")

# --------------------------------------------
# Step 3: Using scipy.stats.chi2_contingency
# --------------------------------------------
observed = np.array(salmon.iloc[:-1, :-1])  # exclude totals
chi2_stat_scipy, p_value, dof, expected_scipy = chi2_contingency(observed)

print("SciPy chi-squared statistic: {:.3f}".format(chi2_stat_scipy))
print("Degrees of freedom:", dof)
print("P-value: {:.6f}".format(p_value))
print("Expected counts:\n", expected_scipy)


Contingency Table with Totals:
         one   two  three  total
feed                           
old     780  1004   1450   3234
new     855  1083   1670   3608
total  1635  2087   3120   6842 

Manual chi-squared statistic: 1.504
Critical value (df=2, alpha=0.05): 5.991
Result: Cannot reject null hypothesis

SciPy chi-squared statistic: 1.504
Degrees of freedom: 2
P-value: 0.471336
Expected counts:
 [[ 772.81350482  986.45980707 1474.7266881 ]
 [ 862.18649518 1100.54019293 1645.2733119 ]]


In [2]:
from scipy.stats import chi2
print(f'cv = {chi2.ppf(1-0.05, df=2):0.3f}')
# cv = 5.991

cv = 5.991
