**Chi-square test**

In [2]:
import numpy as np
import pandas as pd
from scipy.stats import chi2_contingency, chi2

# --- Data Provided ---
# The contingency table showing the counts of customers in each satisfaction level

data = {
    'Satisfaction': ['Very Satisfied', 'Satisfied', 'Neutral', 'Unsatisfied', 'Very Unsatisfied'],
    'Smart Thermostat': [50, 80, 60, 30, 20],
    'Smart Light': [70, 100, 90, 50, 50]
}
df = pd.DataFrame(data)
df = df.set_index('Satisfaction')

print("--- Provided Contingency Table ---")
print(df)
print("\n" + "="*40 + "\n")

# DataFrame to a NumPy array for chi2_contingency
observed_data = df.values

# ---------------------------

# Task 1: State the Hypotheses
print("--- Task 1: State the Hypotheses ---")
print("Null Hypothesis (H0): There is no significant association (independence) between the type of smart home device purchased and the customer satisfaction level. The variables are independent.")
print("Alternative Hypothesis (H1): There is a significant association (dependence) between the type of smart home device purchased and the customer satisfaction level. The variables are dependent.")
print("\n" + "="*40 + "\n")

# Task 2: Compute the Chi-Square Statistic
print("--- Task 2: Compute the Chi-Square Statistic ---")
# Use scipy.stats.chi2_contingency to perform the Chi-Square test.
# It returns the chi2 statistic, p-value, degrees of freedom, and expected frequencies.
chi2_stat, p_value, dof, expected_frequencies = chi2_contingency(observed_data)

print(f"Observed Frequencies:\n{observed_data}")
print(f"\nExpected Frequencies:\n{expected_frequencies.round(2)}")
print(f"\nChi-Square Statistic (χ²): {chi2_stat:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Degrees of Freedom (df): {dof}")
print("\n" + "="*40 + "\n")

# Task 3: Determine the Critical Value
print("--- Task 3: Determine the Critical Value ---")
alpha = 0.05 # Significance level

# Calculate the critical value using the chi-square distribution
critical_value = chi2.ppf(1 - alpha, dof)

print(f"Significance Level (α): {alpha}")
print(f"Degrees of Freedom (df): {dof}")
print(f"Critical Value: {critical_value:.4f}")
print("\n" + "="*40 + "\n")

# Task 4: Make a Decision
print("--- Task 4: Make a Decision ---")
print(f"Chi-Square Statistic: {chi2_stat:.4f}")
print(f"Critical Value: {critical_value:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Significance Level (α): {alpha}")

if chi2_stat > critical_value and p_value < alpha:
    print("\nDecision: Reject the Null Hypothesis (H0).")
    print("Conclusion: There is a statistically significant association between the type of smart home device purchased and the customer satisfaction level   ")
else:
    print("\nDecision: Fail to Reject the Null Hypothesis (H0).")
    print("Conclusion: There is no statistically significant association between the type of smart home device purchased and the customer satisfaction level")

print("\n" + "="*40 + "\n")


--- Provided Contingency Table ---
                  Smart Thermostat  Smart Light
Satisfaction                                   
Very Satisfied                  50           70
Satisfied                       80          100
Neutral                         60           90
Unsatisfied                     30           50
Very Unsatisfied                20           50


--- Task 1: State the Hypotheses ---
Null Hypothesis (H0): There is no significant association (independence) between the type of smart home device purchased and the customer satisfaction level. The variables are independent.
Alternative Hypothesis (H1): There is a significant association (dependence) between the type of smart home device purchased and the customer satisfaction level. The variables are dependent.


--- Task 2: Compute the Chi-Square Statistic ---
Observed Frequencies:
[[ 50  70]
 [ 80 100]
 [ 60  90]
 [ 30  50]
 [ 20  50]]

Expected Frequencies:
[[ 48.  72.]
 [ 72. 108.]
 [ 60.  90.]
 [ 32.  48.]
 [ 28.

**Hypothesis Testing**

In [3]:
import numpy as np
from scipy.stats import norm

# --- Data Provided ---
sample_mean_weekly_cost = 3050  # x_bar
sample_size = 25                 # n
units_produced_mean_X = 600      # Mean of X for theoretical model
units_produced_std_dev_X = 25    # Standard deviation of X

# weekly operating cost model: W = $1,000 + $5X

# ---------------------------------

# Task 1: State the Hypotheses
print("--- Task 1: State the Hypotheses ---")
print("Null Hypothesis (H0): The mean weekly operating cost is not higher than the model suggests (μ <= $4000).")
print("Alternative Hypothesis (H1): The mean weekly operating cost is higher than the model suggests (μ > $4000).")
print("\n" + "="*50 + "\n")

# Task 2: Calculate the Test Statistic
print("--- Task 2: Calculate the Test Statistic ---")

# Calculate the theoretical mean weekly cost (μ) according to the model
# W = $1,000 + $5X, with X = 600 units
theoretical_mean_weekly_cost = 1000 + 5 * units_produced_mean_X
print(f"Theoretical Mean Weekly Cost (μ) based on model (W = $1,000 + $5X, X=600): ${theoretical_mean_weekly_cost:.2f}")

# Calculate the standard deviation of the weekly cost (σ)
# Given: σ_W = 5 * σ_X
std_dev_weekly_cost = 5 * units_produced_std_dev_X
print(f"Standard Deviation of Weekly Cost (σ): ${std_dev_weekly_cost:.2f}")

# Calculate the standard error of the mean
standard_error = std_dev_weekly_cost / np.sqrt(sample_size)
print(f"Standard Error of the Mean: ${standard_error:.2f}")

# Calculate the Z-test statistic
# Z = (x_bar - μ) / (σ / sqrt(n))
test_statistic_z = (sample_mean_weekly_cost - theoretical_mean_weekly_cost) / standard_error

print(f"Sample Mean Weekly Cost (x̄): ${sample_mean_weekly_cost:.2f}")
print(f"Test Statistic (Z): {test_statistic_z:.4f}")
print("\n" + "="*50 + "\n")

# Task 3: Determine the Critical Value
print("--- Task 3: Determine the Critical Value ---")
alpha = 0.05 # Significance level

# For a one-tailed (right-tailed) test, we find the Z-value that corresponds to 1 - alpha
critical_value_z = norm.ppf(1 - alpha)

print(f"Significance Level (α): {alpha}")
print(f"Critical Value (Z-critical) for a one-tailed test: {critical_value_z:.4f}")
print("\n" + "="*50 + "\n")

# Task 4: Make a Decision
print("--- Task 4: Make a Decision ---")
print(f"Test Statistic (Z): {test_statistic_z:.4f}")
print(f"Critical Value (Z-critical): {critical_value_z:.4f}")

decision = ""
if test_statistic_z > critical_value_z:
    decision = "Reject the Null Hypothesis (H0)."
    conclusion_statement = "There is strong evidence to support the restaurant owners' claim that the weekly operating costs are higher than the model suggests."
else:
    decision = "Fail to Reject the Null Hypothesis (H0)."
    conclusion_statement = "There is no strong evidence to support the restaurant owners' claim that the weekly operating costs are higher than the model suggests."

print(f"\nDecision: {decision}")
print("\n" + "="*50 + "\n")

# Task 5: Conclusion
print("--- Task 5: Conclusion ---")
print(conclusion_statement)
print("\n" + "="*50 + "\n")


--- Task 1: State the Hypotheses ---
Null Hypothesis (H0): The mean weekly operating cost is not higher than the model suggests (μ <= $4000).
Alternative Hypothesis (H1): The mean weekly operating cost is higher than the model suggests (μ > $4000).


--- Task 2: Calculate the Test Statistic ---
Theoretical Mean Weekly Cost (μ) based on model (W = $1,000 + $5X, X=600): $4000.00
Standard Deviation of Weekly Cost (σ): $125.00
Standard Error of the Mean: $25.00
Sample Mean Weekly Cost (x̄): $3050.00
Test Statistic (Z): -38.0000


--- Task 3: Determine the Critical Value ---
Significance Level (α): 0.05
Critical Value (Z-critical) for a one-tailed test: 1.6449


--- Task 4: Make a Decision ---
Test Statistic (Z): -38.0000
Critical Value (Z-critical): 1.6449

Decision: Fail to Reject the Null Hypothesis (H0).


--- Task 5: Conclusion ---
There is no strong evidence to support the restaurant owners' claim that the weekly operating costs are higher than the model suggests.


