In [1]:
import math
from scipy import stats
import numpy as np

In [2]:
# 1)The car company wants to determine whether a particular fuel additive improves petrol mileage. The company's vehicles
# typically achieve an average fuel economy of 23 mpg with a standard deviation of 2.4 mpg on a journey from location 'A' to 'E'
# without the additive. As part of a research study, the company sends forty vehicles on the same journey with the additive,
# resulting in an average fuel economy of 23.7 mpg. Can we conclude that the additive is successful based on
# this result? Use a significance level of 5%.

#One-sample z-test for mean
#H0: Additive does not make any difference  (mean=23)
#H1: Additive is efficient (mean>23)

# Population parameters
population_mean = 23  # Average fuel economy without additive
population_std = 2.4  # Standard deviation without additive
# Sample data
sample_size = 40
sample_mean = 23.7  # Average fuel economy with additive
# Calculate standard error
standard_error = population_std / math.sqrt(sample_size)
# Calculate z-score
z_score = (sample_mean - population_mean) / standard_error

# Calculate p-value
p_value = 1 - stats.norm.cdf(z_score)
# Significance level
alpha = 0.05
# Compare the p-value with the significance level
print("pvalue:  " , p_value)
if p_value < alpha:
    print("Reject null hypothesis. There is enough evidence to conclude that the additive is successful.")
else:
    print("Failed to reject null hypothesis. There is not much sufficient evidence to conclude that the additive is successful.")


pvalue:   0.032543363246383517
Reject null hypothesis. There is enough evidence to conclude that the additive is successful.


In [4]:
# 2) Is the average weight of a specific car model significantly different from the industry standard weight of 3000 pounds?
# Assume the sample weights of 5 car models is 2950, 3050, 3100, 2980, and 3010 pounds and the significance level is 0.05.

#One-sample t-test for mean
#H0: The average weight of the specific car model is equal to 3000 pounds (μ = 3000).
#H1: The average weight of the specific car model is significantly different from 3000 pounds(μ ≠ 3000).

# Sample weights of the specific car model
weights = np.array([2950, 3050, 3100, 2980, 3010])
mu = 3000                       # Population mean

# Perform one-sample t-test
t_statistic, p_value = stats.ttest_1samp(weights, mu)

alpha = 0.05                    # Set the significance level

# Compare p-value with significance level
if p_value < alpha:
    decision = "Reject null hypothesis. Therefore the average weight of the specific car model is significantly different from 3000 pounds "
else:
    decision = "Failed to reject null hypothesis. Therefore the average weight of the specific car model is equal to 3000 pounds"

print("t-statistic:", t_statistic)
print("p-value:", p_value)
print("Decision:", decision)


t-statistic: 0.683270656201011
p-value: 0.5319601703570653
Decision: Failed to reject null hypothesis. Therefore the average weight of the specific car model is equal to 3000 pounds


In [5]:
# 3)Is there a discernible difference between ratings of the restaurant servies offered before and after its expansion?
# The ratings given by a sample of 15 customers are as follows. Use a significance level of 5%.
# ratings before expansion = [10,8,7,8,8,8,7,10,8,9,7,9,9,8,9]
# ratings after expansion = [9,8,6,8,7,9,,7,7,7,9,10,9,6,8,10]

#Two-sample t-test for mean
#H0: There is no significant difference in the ratings before and after expansion.
#H1: There is a significant difference in the ratings before and after expansion.

ratings_before = [10,8,7,8,8,8,7,10,8,9,7,9,9,8,9]      # Ratings of the restaurant before expansion
ratings_after = [9,8,6,8,7,9,7,7,7,9,10,9,6,8,10]       # Ratings of the restaurant after expansion

#Perform two-sample t-test
t_statistic, p_value = stats.ttest_ind(ratings_before, ratings_after)

# Set the significance level
alpha = 0.05

# Compare p-value with significance level
if p_value < alpha:
    decision = "Reject null hypothesis. There is a significant difference in the restraurant ratings before and after expansion."
else:
    decision = "Failed to reject null hypothesis. There is no significant difference in the restraurant ratings before and after expansion."

print("t-statistic:", t_statistic)
print("p-value:", p_value)
print("Decision:", decision)


t-statistic: 0.7905694150420962
p-value: 0.4358405212058871
Decision: Failed to reject null hypothesis. There is no significant difference in the restraurant ratings before and after expansion.


In [6]:
# 4)13 men and 10 women took part in a fitness program. Is there a significant difference in the average body fat
# percentage between men and women based on the following data? Assume alpha is 5%.
# Men's body fat percentages: [20.5, 22.1, 18.9, 21.3, 19.8, 23.7, 20.1, 19.2, 22.5, 21.8, 20.9, 19.5, 21.7]
# Women's body fat percentages: [25.6, 26.8, 24.9, 23.5, 27.2, 26.3, 25.1, 24.8, 26.4, 25.7]

#Two-sample independent t-test
#Two-sample independent t-test requires the assumption of homogeneity
#The Levene's test is a statistical test used to assess the equality of variances between multiple groups or samples

#H0: There is no significant difference in the average body fat percentage between men and women.
#H1: There is a significant difference in the average body fat percentage between men and women.

men_body_fat = [20.5, 22.1, 18.9, 21.3, 19.8, 23.7, 20.1, 19.2, 22.5, 21.8, 20.9, 19.5, 21.7]
women_body_fat = [25.6, 26.8, 24.9, 23.5, 27.2, 26.3, 25.1, 24.8, 26.4, 25.7]

#Apply levene's test
statistic, p_value = stats.levene(men_body_fat, women_body_fat)

alpha = 0.05
if p_value < alpha:
    print("The variances of the two groups are significantly different.")
else:
    print("The variances of the two groups are not significantly different.")

The variances of the two groups are not significantly different.


In [8]:
#From the levene's test, we can conclude that the variances of the two groups are not significantly different

#Perform Two-sample independent t-test
t_static, p_value = stats.ttest_ind(men_body_fat, women_body_fat)

if p_value < alpha:
    decision = "Reject null hypothesis. There is a significant difference in the average body fat percentage between men and women."
else:
    decision = "Failed to reject null hypothesis. There is no significant difference in the average body fat percentage between men and women."

print("t-statistic:", t_static)
print("p-value:", p_value)
print("Decision:", decision)

t-statistic: -8.640416865721528
p-value: 2.344768305204797e-08
Decision: Reject null hypothesis. There is a significant difference in the average body fat percentage between men and women.


In [9]:
# 5)A researcher wants to investigate whether there is a relationship between smoking habits (smoker vs. non-smoker) and the
# incidence of lung cancer (yes vs. no). The researcher collects data from a sample of 500 individuals and obtains the following
# table.(alpha=0.05)
#              | Lung Cancer: Yes | Lung Cancer: No |
# -------------|----------------- |-----------------|
# Smoker       |       80         |      120        |
# Non-Smoker   |       50         |      250        |

#Chi-square test
#H0: Smoking habits and the incidence of lung cancer are independent.
#H1: Smoking habits and the incidence of lung cancer are associated.

observed = np.array([[80, 120],
                     [50, 250]])
chi2, p_value, dof, expected = stats.chi2_contingency(observed)
alpha = 0.05

print("Chi-square statistic:", chi2)
print("P-value:", p_value)
print("Degrees of freedom:", dof)
print("Expected frequencies:", expected)

# Compare p-value with significance level
if p_value < alpha:
    print("There is enough evidence to reject the null hypothesis.")
    print("Smoking habits and the incidence of lung cancer are associated.")
else:
    print("There is not enough evidence to reject the null hypothesis.")
    print("Smoking habits and the incidence of lung cancer are independent.")


Chi-square statistic: 32.755110880110884
P-value: 1.0453109856796727e-08
Degrees of freedom: 1
Expected frequencies: [[ 52. 148.]
 [ 78. 222.]]
There is enough evidence to reject the null hypothesis.
Smoking habits and the incidence of lung cancer are associated.
