### Chi-Square Test for Independence — Product Preference vs Gender
Automated Chi-square method

In [6]:
# %load test2.py
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency # chi2_contingency does the Chi-Square test for us.

In [8]:
# Step 2: Load the dataset
df = pd.read_csv("product.csv")  # Ensure the CSV file is in the correct directory

# Step 3: Create a contingency table
contingency_table = pd.crosstab(df["Gender"], df["Preferred Product"])

In [10]:
# Step 4: Perform the Chi-Square Test automatically (without Yates' correction)
chi2_stat, p_value, dof, expected = chi2_contingency(contingency_table, correction=False)

#contingency_table: A 2D array (list of lists or NumPy array) representing a contingency table, which shows the frequency distribution of variables.
#correction=False: This disables Yates’ continuity correction, which is usually applied for 2x2 tables to make the Chi-Square test more conservative.
#This function gives us four things:
#chi2_stat: The test statistic (how different observed and expected values are).
#p_value: The probability that this difference is just by chance.
#dof: Degrees of freedom (used in the calculation).
#expected: A table showing what the numbers would look like if gender didn’t affect product preference.

In [12]:
# Step 5: Display observed and expected frequencies
print("Observed Frequency Table:")
print(contingency_table)

print("\nExpected Frequencies (Calculated Automatically):")
expected_df = pd.DataFrame(expected, columns=contingency_table.columns, index=contingency_table.index)
print(expected_df)

# Step 6: Display test statistics
print("\nChi-square Statistic:", round(chi2_stat, 2))
print("P-value:", round(p_value, 4))
print("Degrees of Freedom:", dof)

Observed Frequency Table:
Preferred Product  Product A  Product B
Gender                                 
Female                     2          4
Male                       4          2

Expected Frequencies (Calculated Automatically):
Preferred Product  Product A  Product B
Gender                                 
Female                   3.0        3.0
Male                     3.0        3.0

Chi-square Statistic: 1.33
P-value: 0.2482
Degrees of Freedom: 1


In [14]:
# Step 7: Interpretation
#alpha = 0.05 means you're okay with a 5% chance of making a wrong decision.
#If p-value < 0.05, the result is significant → gender does influence product choice.
#If p-value ≥ 0.05, the result is not significant → gender does not seem to influence product choice.
    
alpha = 0.05
if p_value < alpha:
    print("\nWe reject the null hypothesis. Gender influences product preference.")
else:
    print("\nWe fail to reject the null hypothesis. Gender does not significantly influence product preference.")
    
#A small p-value (typically < 0.05) suggests rejecting the null hypothesis, meaning there is a statistically significant association between the variables.
#A large p-value indicates no significant association.


We fail to reject the null hypothesis. Gender does not significantly influence product preference.
