### Contigency Table

In [6]:
import pandas as pd

# Example data
data = {
    "Gender": ["Male", "Female", "Male", "Female", "Male", "Female", "Male", "Female"],
    "Preference": ["A", "B", "A", "A", "B", "A", "A", "B"]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Generate a contingency table
contingency_table = pd.crosstab(df['Gender'], df['Preference'])

# Display the table
print(contingency_table)


Preference  A  B
Gender          
Female      2  2
Male        3  1


In [8]:
import numpy as np
from scipy.stats import chi2_contingency

# Observed data (contingency table)
# Rows: Gender (Male, Female)
# Columns: Product Preference (A, B)
data = np.array([
    [30, 20],  # Male preferences
    [40, 10]   # Female preferences
])


# Perform the Chi-Square Test of Independence
chi2, p, dof, expected = chi2_contingency(contingency_table)

# Results
print("Chi-Square Statistic:", chi2)
print("p-value:", p)
print("Degrees of Freedom:", dof)
print("Expected Frequencies:")
print(expected)

# Conclusion
if p < 0.05:
    print("Reject the null hypothesis: Product preference is not independent of gender.")
else:
    print("Fail to reject the null hypothesis: Product preference is independent of gender.")


Chi-Square Statistic: 0.0
p-value: 1.0
Degrees of Freedom: 1
Expected Frequencies:
[[2.5 1.5]
 [2.5 1.5]]
Fail to reject the null hypothesis: Product preference is independent of gender.


In [4]:
import numpy as np

# Observed data (contingency table)
observed = np.array([
    [30, 20],  # Male preferences
    [40, 10]   # Female preferences
])

# Step 1: Calculate row totals, column totals, and grand total
row_totals = observed.sum(axis=1)
col_totals = observed.sum(axis=0)
grand_total = observed.sum()

# Step 2: Calculate expected frequencies
expected = np.outer(row_totals, col_totals) / grand_total

# Step 3: Compute the Chi-Square statistic
chi_square_stat = ((observed - expected) ** 2 / expected).sum()

# Step 4: Calculate degrees of freedom
rows, cols = observed.shape
degrees_of_freedom = (rows - 1) * (cols - 1)

# Step 5: Output the results
print("Observed Frequencies:")
print(observed)

print("\nExpected Frequencies:")
print(expected)

print(f"\nChi-Square Statistic: {chi_square_stat:.2f}")
print(f"Degrees of Freedom: {degrees_of_freedom}")

# Interpretation
from scipy.stats import chi2

# Step 6: Find the p-value
p_value = 1 - chi2.cdf(chi_square_stat, degrees_of_freedom)

print(f"p-value: {p_value:.4f}")

if p_value < 0.05:
    print("Reject the null hypothesis: Product preference is not independent of gender.")
else:
    print("Fail to reject the null hypothesis: Product preference is independent of gender.")


Observed Frequencies:
[[30 20]
 [40 10]]

Expected Frequencies:
[[35. 15.]
 [35. 15.]]

Chi-Square Statistic: 4.76
Degrees of Freedom: 1
p-value: 0.0291
Reject the null hypothesis: Product preference is not independent of gender.
