In [23]:
import pandas as pd
from scipy.stats import norm
import matplotlib.pyplot as plt
import seaborn as sns

from typing import Union

In [24]:
def ztest(
    samplemean: float,
    samplesize: int,
    populationmean: float,
    populationstd: float,
    alternative: str = "two_sided",
) -> tuple:
    std_err = populationstd / (samplesize**0.5)
    z_score = (samplemean - populationmean) / std_err

    # calculating p_value
    if alternative == "greater":
        p_value = 1 - norm.cdf(z_score)
    elif alternative == "less":
        p_value = norm.cdf(z_score)
    elif alternative == "two_sided":
        p_value = 2 * (1 - norm.cdf(abs(z_score)))

    return p_value, z_score


def zcritical(
    alpha: float, alternative: str = "two_sided"
) -> Union[float, tuple]:  # can also be written as float | tuple, with bitwise operator

    if alternative == "two_sided":
        critical_value_left = norm.ppf(alpha / 2)
        critical_value_right = norm.ppf(1 - alpha / 2)
        return critical_value_left, critical_value_right

    elif alternative == "less":
        critical_value = norm.ppf(alpha)

    elif alternative == "greater":
        critical_value = norm.ppf(1 - alpha)

        return critical_value

#### A country has a population average height of 65 inches with a standard deviation of 2.5. A person feels people from his state are shorter. He takes the average of 20 people and sees that it is 64.5. At a 5% significance level (or 95% confidence level), can we conclude that people from his state are shorter, using the Z-test? What is the p-value?

In [25]:
H0 = "Average population is 65"
H1 = "Average population is less than 65"

test_statistic = ztest
alternative = "less"
alpha = 0.05

samplemean = 64.5
samplesize = 20
populationmean = 65
populationstd = 2.5
alternative = "less"
alpha = 0.05

p_value, zscore = test_statistic(64.5, 20, 65, 2.5, alternative)

if p_value < alpha:
    print(f"Reject Null hypothesis: {p_value, zscore}->", H1)
else:
    print(f"Failed to reject: {p_value}->", H0)

Failed to reject: 0.18554668476134878-> Average population is 65


#### A French cafe has historically maintained that their average daily pastry production is at most 500. With the installation of a new machine, they assert that the average daily pastry production has increased. The average number of pastries produced per day over a 70-day period was found to be 530.Assume that the population standard deviation for the pastries produced per day is 125. Perform a z-test with the critical z-value = 1.64 at the alpha (significance level) = 0.05 to evaluate if ther

In [26]:
H0 = "Average pastry production is 500"
H1 = "Average pastry production is more than 500"

test_statistic = ztest

samplemean = 530
samplesize = 70
populationmean = 500
populationstd = 125
alternative = "greater"
alpha = 0.05

p_value, zscore = test_statistic(
    samplemean, samplesize, populationmean, populationstd, alternative
)

if p_value < alpha:
    print(f"Reject Null hypothesis: {p_value, zscore}->", H1)
else:
    print(f"Failed to reject: {p_value, zscore}->", H0)

Reject Null hypothesis: (0.022322492581293485, 2.007984063681781)-> Average pastry production is more than 500


#### The Chai Point stall at Bengaluru airport estimates that each person visiting the store drinks an average of 1.7 small cups of tea. Assume a population standard deviation of 0.5 small cups. A sample of 30 customers collected over a few days averaged 1.85 small cups of tea per person. Test the claim using an appropriate test at an alpha = 0.05 significance value, with a critical z-score value of ±1.96. Note: Round off the z-score to two decimal places

In [27]:
H0 = "Average drinks 1.7"
H1 = "Average drinks more than 1.7"

test_statistic = ztest

samplemean = 1.85
samplesize = 30
populationmean = 1.7
populationstd = 0.5
alternative = "greater"
alpha = 0.05

p_value, zscore = test_statistic(
    samplemean, samplesize, populationmean, populationstd, alternative
)

if p_value < alpha:
    print(f"Reject Null hypothesis: {p_value, zscore}->", H1)
else:
    print(f"Failed to reject: {p_value, zscore}->", H0)

Failed to reject: (0.05017412323114523, 1.6431676725155)-> Average drinks 1.7


#### A data scientist is looking at how a web application responds, with an average response time of 250 milliseconds and a standard deviation of 30 milliseconds. Find the critical value for a 96% confidence level.

In [28]:
mu = 250
st = 30

alpha = 0.04

left, right = zcritical(alpha, alternative="two_sided")

left_critical, right_critical = mu + (left * st), mu + (right * st)

left_critical, right_critical

(188.38753268104531, 311.61246731895466)

#### A marketing team aims to estimate the average time, visitors spend on their website. They gathered a random sample of 100 visitors and determined that the average time spent on the website was 4.5 minutes. The team is working under the assumption that the population's mean time spent on the website is 4.0 minutes, with a standard deviation of 1.2 minutes. Their goal is to estimate the true time spent on the website with a 95% confidence level. Calculate the confidence interval values and make a conclusion based on calculated interval

In [29]:
alpha = 0.05
sample_mean = 4.5
std_err = 1.2 / (100**0.5)
left_critical, right_critical = zcritical(alpha=alpha, alternative="two_sided")

(sample_mean + left_critical * std_err), (sample_mean + right_critical * std_err)

(4.264804321855194, 4.735195678144806)

It is known that the mean IQ of high school students is 100, and the standard deviation is 15.

A coaching institute claims that candidates who study there have more IQ than an average high school student. When the IQ of 50 candidates was calculated, the average turned out to be 110

Conduct an appropriate hypothesis test to test the institute’s claim, with a significance level of 5%

In [30]:
population_mean = 100
pop_std = 15

sample_mean = 110
sample_num = 110

H0 = "IQ is 100"
H1 = "IQ is > 100"

## we need to compare 2 sample mean

p_value, zscore = ztest(sample_mean, sample_num, population_mean, pop_std, alternative="greater")

if p_value < alpha:
    print(f"Reject Null hypothesis: {p_value, zscore}->", H1)
else:
    print(f"Failed to reject: {p_value, zscore}->", H0)

Reject Null hypothesis: (1.3543610677402285e-12, 6.99205898780101)-> IQ is > 100


When smokers smoke, nicotine is transformed into cotinine, which can be tested.

The average cotinine level in a group of 50 smokers was 243.5 ng ml.

Assuming that the standard deviation is known to be 229.5 ng ml.

Test the assertion that the mean cotinine level of all smokers is equal to 300.0 ng ml, at 95% confidence.

In [32]:
sample_num = 50
sample_mean = 243.5

population_mean = 300
population_std = 229.5

H0 = "mean cotinine level is 300"
H1 = "mean cotinine level is not equal to 300"
alpha = 0.05
p_value, zscore = ztest(sample_mean, sample_num, population_mean, population_std, alternative="two_sided")

if p_value < alpha:
    print(f"Reject Null hypothesis: {p_value, zscore}->", H1)
else:
    print(f"Failed to reject: {p_value, zscore}->", H0)

Failed to reject: (0.08171731915149638, -1.7408075440976007)-> mean cotinine level is 300
