## Hypothesis testing

In [2]:
import numpy as np
from scipy import stats
import warnings
warnings.filterwarnings("ignore")

In [4]:
sample_mean = 3050  # Rs. 3,050
theoretical_mean = 1000 + 5*600  # theoretical mean weekly cost according to the cost model
standard_deviation = 5*25  # 5*25 units
sample_size = 25  # number of restaurants

In [6]:
# Calculate the test statistic (Z-score)
z_score = (sample_mean - theoretical_mean) / (standard_deviation / np.sqrt(sample_size))
z_score

np.float64(-38.0)

In [8]:
# Determine the critical value
alpha = 0.05  # alpha level of 5%
critical_value = stats.norm.ppf(1 - alpha/2)

In [10]:
# Make a decision
if np.abs(z_score) > critical_value:
    decision = "Reject the null hypothesis"
else:
    decision = "Fail to reject the null hypothesis"

In [12]:
# Conclusion
conclusion = f"Based on the decision, there {decision} strong evidence to support the restaurant owner's claim that the weekly operating costs are higher than the model suggests."

In [14]:
# Print the results
print("Sample Mean:", sample_mean)
print("Theoretical Mean:", theoretical_mean)
print("Standard Deviation:", standard_deviation)
print("Sample Size:", sample_size)
print("Z-score:", z_score)
print("Critical Value:", critical_value)
print(conclusion)

Sample Mean: 3050
Theoretical Mean: 4000
Standard Deviation: 125
Sample Size: 25
Z-score: -38.0
Critical Value: 1.959963984540054
Based on the decision, there Reject the null hypothesis strong evidence to support the restaurant owner's claim that the weekly operating costs are higher than the model suggests.


# Chi Square test

In [17]:
import pandas as pd
import numpy as np
import scipy.stats as stats

In [27]:
data = {'Satisfaction': ['Very Satisfied', 'Satisfied', 'Neutral', 'Unsatisfied', 'Very Unsatisfied', 'Total'],'Smart Thermostat': [50, 80, 60, 30, 20, 240],'Smart Light': [70, 100, 90, 50, 50, 360],'Total': [120, 180, 150, 80, 70, 600] }

df = pd.DataFrame(data)

# Set 'Satisfaction' column as index
df.set_index('Satisfaction', inplace=True)

print(df)

                  Smart Thermostat  Smart Light  Total
Satisfaction                                          
Very Satisfied                  50           70    120
Satisfied                       80          100    180
Neutral                         60           90    150
Unsatisfied                     30           50     80
Very Unsatisfied                20           50     70
Total                          240          360    600


**Hypotheses:**<br>
**Null Hypothesis ($H_0$):** There is no significant association between the type of smart home device purchased (Smart Thermostats vs. Smart Lights) and the customer satisfaction level.\
**Alternative Hypothesis ($H_1$):** There is a significant association between the type of smart home device purchased and the customer satisfaction level.

In [30]:
# Remove the 'Total' row and column from the DataFrame as they are not needed for chi-square test
df_chi = df.drop(index='Total', columns='Total')

# Calculate row and column totals
row_totals = df_chi.sum(axis=1)
col_totals = df_chi.sum(axis=0)

# Calculate the total count of observations
total_count = row_totals.sum()

# Initialize a DataFrame to store expected frequencies
expected_df = pd.DataFrame(index=df_chi.index, columns=df_chi.columns)

# Compute expected frequencies for each cell
for i in range(len(df_chi.index)):
    for j in range(len(df_chi.columns)):
        expected_df.iloc[i, j] = (row_totals[i] * col_totals[j]) / total_count

# Convert expected frequencies to numeric values
expected_df = expected_df.astype(float)

print("Expected Frequencies:")
print(expected_df)

Expected Frequencies:
                  Smart Thermostat  Smart Light
Satisfaction                                   
Very Satisfied                48.0         72.0
Satisfied                     72.0        108.0
Neutral                       60.0         90.0
Unsatisfied                   32.0         48.0
Very Unsatisfied              28.0         42.0


In [32]:
# Convert DataFrame to numpy array for easier computation
observed = df_chi.values.astype(float)
expected = expected_df.values.astype(float)

# Compute chi-square statistic
chi_squared = np.sum((observed - expected)**2 / expected)
print("Chi-Square Statistic:", chi_squared)

Chi-Square Statistic: 5.638227513227513


In [34]:
# Significance level (alpha)
alpha = 0.05

# Degrees of freedom (number of categories minus 1)
df = (len(df_chi.index) - 1) * (len(df_chi.columns) - 1)

# Determine critical value from chi-square distribution
critical_value = stats.chi2.ppf(1 - alpha, df)

print("Critical Value (df={}, alpha={}): {:.3f}".format(df, alpha, critical_value))

# Compare chi-square statistic with critical value
if chi_squared < critical_value:
    print("Decision: Fail to reject the null hypothesis.")
    print("There is not enough evidence to conclude that there is a significant association between the type of smart home device purchased (Smart Thermostats vs. Smart Lights) and the customer satisfaction level at a significance level of 0.05.")
else:
    print("Decision: Reject the null hypothesis.")
    print("There is significant evidence to conclude that there is an association between the type of smart home device purchased (Smart Thermostats vs. Smart Lights) and the customer satisfaction level at a significance level of 0.05.")

Critical Value (df=4, alpha=0.05): 9.488
Decision: Fail to reject the null hypothesis.
There is not enough evidence to conclude that there is a significant association between the type of smart home device purchased (Smart Thermostats vs. Smart Lights) and the customer satisfaction level at a significance level of 0.05.
