In [1]:
# Importing necessary libraries
import numpy as np
import scipy.stats as stats


In [4]:
# Q1: Difference between t-test and z-test
# A t-test is used when the sample size is small (n < 30) or when the population standard deviation is unknown.
# A z-test is used when the sample size is large (n ≥ 30) or when the population standard deviation is known.
# Example scenario for t-test: A researcher wants to test if the average weight of a sample of 25 students is different
# from the population mean weight of 60 kg.
# Example scenario for z-test: A researcher wants to test if the average height of a large sample of 1000 people is different
# from the population mean height of 170 cm.


In [5]:
# Q2: One-tailed vs. Two-tailed tests
# A one-tailed test checks if the sample mean is greater than or less than the population mean.
# A two-tailed test checks if the sample mean is significantly different from the population mean, without specifying the direction.


In [7]:
# Q3: Type 1 and Type 2 errors in hypothesis testing
# Type 1 error: False positive. Rejecting the null hypothesis when it is actually true.
# Type 2 error: False negative. Failing to reject the null hypothesis when it is actually false.
# Example Type 1 error: A test shows that a new drug is effective when it is actually not.
# Example Type 2 error: A test fails to show that a new drug is effective when it actually is.

In [10]:
# Q4: Bayes's theorem
# Bayes's theorem provides a way to update the probability of a hypothesis based on new evidence.
# Formula: P(H|E) = (P(E|H) * P(H)) / P(E)
# Example: If the probability of a person having a disease is 0.1 (P(H) = 0.1), and the test for the disease
# has a 95% accuracy (P(E|H) = 0.95), and the probability of a positive test result is 0.2 (P(E) = 0.2),
# what is the probability that the person has the disease given a positive test result?

# Applying Bayes' theorem:
P_H = 0.1  # Prior probability of the hypothesis (disease)
P_E_given_H = 0.95  # Probability of evidence given hypothesis (test accuracy)
P_E = 0.2  # Probability of evidence (test result)

P_H_given_E = (P_E_given_H * P_H) / P_E
P_H_given_E  # Probability of disease given positive test result


0.475

In [11]:
# Q5: Confidence Interval
# A confidence interval is a range of values that is likely to contain the true population parameter with a certain level of confidence.
# Formula: CI = sample mean ± (Z or t score * standard error)

# Example: A sample has a mean of 50, a standard deviation of 5, and a sample size of 25. Calculate the 95% confidence interval.

mean = 50
std_dev = 5
n = 25
confidence_level = 0.95

# Calculate standard error
std_error = std_dev / np.sqrt(n)

# Z-value for 95% confidence level (for normal distribution)
z_value = stats.norm.ppf(1 - (1 - confidence_level) / 2)

# Calculate the confidence interval
CI_lower = mean - (z_value * std_error)
CI_upper = mean + (z_value * std_error)
CI_lower, CI_upper  # Confidence interval


(np.float64(48.04003601545995), np.float64(51.95996398454005))

In [12]:
# Q6: Bayes' Theorem application (sample problem)
# Problem: A company has a 20% chance of hiring someone (P(H) = 0.2), and a 90% chance of a successful interview
# (P(E|H) = 0.9) with 50% overall success rate (P(E) = 0.5).
P_H = 0.2
P_E_given_H = 0.9
P_E = 0.5

P_H_given_E = (P_E_given_H * P_H) / P_E
P_H_given_E  # Probability of hiring given a successful interview

0.36000000000000004

In [13]:
# Q7: 95% confidence interval for a sample with mean 50 and standard deviation 5
mean = 50
std_dev = 5
n = 30  # Example sample size

std_error = std_dev / np.sqrt(n)
z_value = stats.norm.ppf(1 - (1 - 0.95) / 2)

CI_lower = mean - (z_value * std_error)
CI_upper = mean + (z_value * std_error)
CI_lower, CI_upper  # Confidence interval

(np.float64(48.210805856282846), np.float64(51.789194143717154))

In [14]:
# Q8: Margin of error and sample size effect
# The margin of error (ME) decreases as the sample size increases, reducing the uncertainty.
# Formula: ME = Z * (std_dev / sqrt(n))

# Example: Larger sample size (n=1000) will result in smaller margin of error compared to smaller sample size (n=50)
n_1 = 50
n_2 = 1000

std_error_1 = std_dev / np.sqrt(n_1)
std_error_2 = std_dev / np.sqrt(n_2)

margin_of_error_1 = z_value * std_error_1
margin_of_error_2 = z_value * std_error_2

margin_of_error_1, margin_of_error_2  # Margin of error for both sample sizes

(np.float64(1.3859038243496777), np.float64(0.3098975161522808))

In [15]:
# Q9: Z-score calculation for a data point
value = 75
mean = 70
std_dev = 5

z_score = (value - mean) / std_dev
z_score  # Z-score

1.0

In [16]:
# Q10: t-test for drug effectiveness
# Null hypothesis: The drug has no effect, so the mean weight loss is 0.
# t = (sample mean - population mean) / (sample std deviation / sqrt(sample size))

mean_weight_loss = 6
std_dev_loss = 2.5
n = 50
population_mean = 0  # Null hypothesis

t_statistic = (mean_weight_loss - population_mean) / (std_dev_loss / np.sqrt(n))

# t-value for 95% confidence level and 49 degrees of freedom
t_critical = stats.t.ppf(0.975, df=n-1)

# Check if t_statistic is greater than t_critical to reject null hypothesis
t_statistic, t_critical

(np.float64(16.970562748477143), np.float64(2.0095752371292397))

In [17]:
# Q11: 95% confidence interval for job satisfaction
p_hat = 0.65  # Proportion of satisfied people
n = 500  # Sample size

# Standard error for proportions
std_error = np.sqrt((p_hat * (1 - p_hat)) / n)

# Z-value for 95% confidence level
z_value = stats.norm.ppf(1 - (1 - 0.95) / 2)

CI_lower = p_hat - (z_value * std_error)
CI_upper = p_hat + (z_value * std_error)
CI_lower, CI_upper  # Confidence interval for the proportion

(np.float64(0.6081925393809212), np.float64(0.6918074606190788))

In [18]:
# Q12: t-test for two independent samples (Teaching methods)
mean_A = 85
std_dev_A = 6
n_A = 30
mean_B = 82
std_dev_B = 5
n_B = 30

# t-statistic for two independent samples
pooled_std_dev = np.sqrt(((n_A - 1) * std_dev_A**2 + (n_B - 1) * std_dev_B**2) / (n_A + n_B - 2))
t_statistic = (mean_A - mean_B) / (pooled_std_dev * np.sqrt(1/n_A + 1/n_B))

# t-value for significance level of 0.01
t_critical = stats.t.ppf(1 - 0.01 / 2, df=n_A + n_B - 2)
t_statistic, t_critical  # Compare t-statistic with t-critical

(np.float64(2.10386061995483), np.float64(2.663286953537658))

In [19]:
# Q13: 90% confidence interval for population mean
sample_mean = 65
population_mean = 60
std_dev = 8
n = 50

std_error = std_dev / np.sqrt(n)
z_value = stats.norm.ppf(1 - (1 - 0.90) / 2)

CI_lower = sample_mean - (z_value * std_error)
CI_upper = sample_mean + (z_value * std_error)
CI_lower, CI_upper  # Confidence interval

(np.float64(63.13906055411732), np.float64(66.86093944588268))

In [20]:
# Q14: t-test for caffeine effect on reaction time
mean_reaction_time = 0.25
std_dev_reaction_time = 0.05
n = 30
population_mean = 0  # Null hypothesis

t_statistic = (mean_reaction_time - population_mean) / (std_dev_reaction_time / np.sqrt(n))

# t-value for 90% confidence level
t_critical = stats.t.ppf(0.95, df=n-1)
t_statistic, t_critical  # Compare t-statistic with t-critical

(np.float64(27.386127875258307), np.float64(1.6991270265334972))