# CHI-SQUARE TEST

In [5]:
#import libraries
import pandas as pd
from scipy.stats import chi2_contingency, chi2

# 1. State the Hypotheses:

In [8]:
print("STEP 1: Hypotheses")
print("H₀ (Null Hypothesis): There is no association between device type and customer satisfaction (they are independent).")
print("H₁ (Alternative Hypothesis): There is an association between device type and customer satisfaction (they are not independent).")

STEP 1: Hypotheses
H₀ (Null Hypothesis): There is no association between device type and customer satisfaction (they are independent).
H₁ (Alternative Hypothesis): There is an association between device type and customer satisfaction (they are not independent).


# 2. Compute the Chi-Square Statistic:

In [11]:
observed = pd.DataFrame({
    'Smart Thermostat': [50, 80, 60, 30, 20],
    'Smart Light': [70, 100, 90, 50, 50]
}, index=['Very Satisfied', 'Satisfied', 'Neutral', 'Unsatisfied', 'Very Unsatisfied'])

print("\nSTEP 2: Observed Frequencies (Contingency Table):")
print(observed)

# Perform Chi-Square Test
chi2_stat, p_val, dof, expected = chi2_contingency(observed)

# Expected Frequencies
expected_df = pd.DataFrame(expected, index=observed.index, columns=observed.columns)

print("\nSTEP 3: Chi-Square Test Results:")
print(f"Chi-Square Statistic: {round(chi2_stat, 4)}")
print(f"Degrees of Freedom: {dof}")
print(f"P-Value: {round(p_val, 4)}")

print("\nExpected Frequencies:")
print(expected_df.round(2))


STEP 2: Observed Frequencies (Contingency Table):
                  Smart Thermostat  Smart Light
Very Satisfied                  50           70
Satisfied                       80          100
Neutral                         60           90
Unsatisfied                     30           50
Very Unsatisfied                20           50

STEP 3: Chi-Square Test Results:
Chi-Square Statistic: 5.6382
Degrees of Freedom: 4
P-Value: 0.2278

Expected Frequencies:
                  Smart Thermostat  Smart Light
Very Satisfied                48.0         72.0
Satisfied                     72.0        108.0
Neutral                       60.0         90.0
Unsatisfied                   32.0         48.0
Very Unsatisfied              28.0         42.0


# 3. Determine the Critical Value:

### Using the significance level (alpha) of 0.05 and the degrees of freedom (which is the number of categories minus 1)

In [15]:
alpha = 0.05
critical_value = chi2.ppf(1 - alpha, dof)

print("\nSTEP 4: Critical Value and Comparison")
print(f"Significance Level (alpha): {alpha}")
print(f"Degrees of Freedom: {dof}")
print(f"Critical Value (Chi-Square) at α = 0.05: {round(critical_value, 4)}")


STEP 4: Critical Value and Comparison
Significance Level (alpha): 0.05
Degrees of Freedom: 4
Critical Value (Chi-Square) at α = 0.05: 9.4877


# 4. Make a Decision:

In [18]:
print("\nSTEP 5: Decision and Conclusion")
if chi2_stat > critical_value:
    print("Result: Reject the null hypothesis.")
    print("Conclusion: There IS a significant association between device type and customer satisfaction.")
else:
    print("Result: Fail to reject the null hypothesis.")
    print("Conclusion: There is NO significant association between device type and customer satisfaction.")


STEP 5: Decision and Conclusion
Result: Fail to reject the null hypothesis.
Conclusion: There is NO significant association between device type and customer satisfaction.
