# Name: Arav Kumar

### Batch Start 19-Aug-24

### Course: Data Science

## CHI-SQUARE TEST

#### Association between Device Type and Customer Satisfaction

In [4]:
import numpy as np
import scipy.stats as stats

In [5]:
# Observed data in the contingency table
observed = np.array([
    [50, 70],
    [80, 100],
    [60, 90],
    [30, 50],
    [20, 50]
])

In [6]:
# Row and column totals
row_totals = observed.sum(axis=1)
column_totals = observed.sum(axis=0)
grand_total = observed.sum()

In [7]:
row_totals

array([120, 180, 150,  80,  70])

In [8]:
column_totals

array([240, 360])

In [9]:
grand_total

600

In [10]:
# Expected frequencies
expected = np.outer(row_totals, column_totals) / grand_total
expected

array([[ 48.,  72.],
       [ 72., 108.],
       [ 60.,  90.],
       [ 32.,  48.],
       [ 28.,  42.]])

In [11]:
# Compute Chi-Square Statistic
chi_square_stat = ((observed - expected) ** 2 / expected).sum()

In [12]:
# Degrees of freedom
df = (observed.shape[0] - 1) * (observed.shape[1] - 1)
df

4

In [13]:
# Critical value for 0.05 significance level
alpha = 0.05
critical_value = stats.chi2.ppf(1 - alpha, df)

In [14]:
# p-value
p_value = 1 - stats.chi2.cdf(chi_square_stat, df)

In [15]:
# Decision
print("Chi-Square Statistic:", chi_square_stat)
print("Critical Value at alpha=0.05:", critical_value)
print("p-value:", p_value)

Chi-Square Statistic: 5.638227513227513
Critical Value at alpha=0.05: 9.487729036781154
p-value: 0.22784371130697179


In [16]:
if chi_square_stat > critical_value:
    print("Reject the null hypothesis: There is a significant association between device type and customer satisfaction.")
else:
    print("Fail to reject the null hypothesis: No significant association between device type and customer satisfaction.")

Fail to reject the null hypothesis: No significant association between device type and customer satisfaction.


# HYPOTHESIS TESTING

In [18]:
import math
from scipy.stats import norm

In [19]:
# Given data
sample_mean = 3050  # Sample mean weekly cost in Rs.
theoretical_cost_per_unit = 5
base_cost = 1000
average_units = 600  # Mean units produced per week
std_dev_units = 25
sample_size = 25  # Number of restaurants in the sample
alpha = 0.05  # Significance level

In [20]:
# Step 1: Calculate the theoretical mean weekly cost
theoretical_mean = base_cost + (theoretical_cost_per_unit * average_units)

In [21]:
# Step 2: Calculate the standard deviation of the weekly cost
weekly_cost_std_dev = theoretical_cost_per_unit * std_dev_units

In [22]:
# Step 3: Calculate the test statistic
standard_error = weekly_cost_std_dev / math.sqrt(sample_size)
test_statistic = (sample_mean - theoretical_mean) / standard_error

In [23]:
# Step 4: Determine the critical value (one-tailed test, alpha = 0.05)
critical_value = norm.ppf(1 - alpha)

In [24]:
# Step 5: Decision
decision = "Reject the null hypothesis" if test_statistic < -critical_value else "Fail to reject the null hypothesis"

In [25]:
# Output the results
print(f"Theoretical Mean Weekly Cost: {theoretical_mean} Rs.")
print(f"Sample Mean Weekly Cost: {sample_mean} Rs.")
print(f"Standard Deviation of Weekly Cost: {weekly_cost_std_dev} Rs.")
print(f"Standard Error: {standard_error}")
print(f"Test Statistic (t): {test_statistic}")
print(f"Critical Value (Z) at alpha 0.05: {critical_value}")
print(f"Decision: {decision}")

Theoretical Mean Weekly Cost: 4000 Rs.
Sample Mean Weekly Cost: 3050 Rs.
Standard Deviation of Weekly Cost: 125 Rs.
Standard Error: 25.0
Test Statistic (t): -38.0
Critical Value (Z) at alpha 0.05: 1.6448536269514722
Decision: Reject the null hypothesis
