In [1]:
# ONE-TAILED TEST (Left-Tailed)
# Hypothesis Testing Example for Medicine Testing

# A pharmaceutical company claims that their new medicine reduces fever duration.
# Historically, the average fever duration is 16 weeks.

# Null Hypothesis (H0): Mean fever duration is at least 16 weeks (mu >= 16)
# Alternative Hypothesis (H1): Mean fever duration is less than 16 weeks (mu < 16)

# This setup prepares us to conduct a hypothesis test to determine whether the new 
# medicine significantly lowers fever recovery time.

In [2]:
# Import necessary libraries for statistical analysis
import numpy as np  # For numerical computations
from scipy.stats import norm  # For normal distribution-related functions

# These libraries will help with numerical operations and statistical hypothesis testing.

In [3]:
# Set a random seed for reproducibility
np.random.seed(42)  # Ensures the results remain consistent across runs

# Define the sample size (number of patients in the study)
n = 1000  # Simulated data for 1,000 patients

In [4]:
# Generate random numbers from a normal distribution
# Using loc = mean (mu) as the center of the distribution
# Using scale = standard deviation (sigma) to control variability in the data

random_data = np.random.normal(loc=16, scale=3, size=1000)

# Print first 10 generated values for inspection
print(random_data[:10])  # Displays sample values to verify distribution

[17.49014246 15.5852071  17.94306561 20.56908957 15.29753988 15.29758913
 20.73763845 18.30230419 14.59157684 17.62768013]


In [5]:
# Generate random sample data representing fever recovery durations
# Assuming the new medicine reduces the fever duration to a mean of 15.8 weeks
# loc = 15.8 (mean recovery duration under the new treatment)
# scale = 3 (standard deviation, indicating variability in recovery times)
# size = n (total number of samples)

sample_data = np.random.normal(loc=15.8, scale=3, size=n)

# Print the first few generated values to inspect the dataset
print(sample_data[:10])  # Displays the first 10 simulated recovery times

[19.99806631 18.57390105 15.97889111 13.85918967 17.89466994 16.98045616
 18.48557966 17.70551541 18.94865815 14.19429437]


In [6]:
# Define the mean value under the null hypothesis (H0)
# H0 states that the average fever recovery duration is 16 weeks
mu0 = 16

# This value will be used for statistical hypothesis testing to compare against the sample mean.

In [7]:
# Compute sample statistics for hypothesis testing
sample_mean = np.mean(sample_data)  # Calculate the mean of the sample data
sample_std = np.std(sample_data, ddof=1)  # Compute the sample standard deviation (using Bessel's correction)
se = sample_std / np.sqrt(n)  # Calculate the standard error of the mean (SE)

# Print the computed values for inspection
print("Sample Mean:", sample_mean)
print("Sample Standard Deviation:", sample_std)
print("Standard Error:", se)

Sample Mean: 16.01250871174747
Sample Standard Deviation: 2.9923631316822625
Standard Error: 0.0946268308243031


In [8]:
# Perform a one-tailed Z-test to evaluate the hypothesis
# - This test checks if the sample mean is significantly less than the hypothesized mean (mu0)

z_stat = (sample_mean - mu0) / se  # Compute the Z-score (standardized test statistic)

# Compute the one-tailed p-value
# norm.cdf(z_stat) gives the probability of observing a value ≤ z_stat under normality
p_value = norm.cdf(z_stat)

# Print the results for interpretation
print("Z-Statistic:", z_stat)
print("P-Value:", p_value)

# Decision criteria:
# - If p_value ≤ 0.05: Reject H0 (significant difference, sample mean is significantly lower)
# - If p_value > 0.05: Fail to reject H0 (no significant evidence that sample mean is lower)
if p_value <= 0.05:
    print("Reject H0: Fever duration is significantly shorter than 16 weeks.")
else:
    print("Fail to reject H0: No significant evidence that fever duration is shorter than 16 weeks.")

Z-Statistic: 0.13218990468669334
P-Value: 0.5525829570294422
Fail to reject H0: No significant evidence that fever duration is shorter than 16 weeks.


In [9]:
# Define different significance levels (alpha values) for hypothesis testing
alphas = [0.01, 0.05, 0.10]  # Common thresholds for rejecting the null hypothesis

# Print table header with formatted spacing
print(f"{'Alpha':<8}{'Z-Stat':>10}{'P-Value':>12}{'Critical z':>15}{'Decision':>15}")
print("-" * 60)  # Separator for readability

# Loop through each alpha level to compute critical values and decide hypothesis outcome
for alpha in alphas:
    z_critical = norm.ppf(alpha)  # Compute one-tailed critical Z value
    decision = "Reject H0" if z_stat < z_critical else "Fail to Reject"  # Hypothesis test decision
    print(f"{alpha:<8.2f}{z_stat:>10.4f}{p_value:>12.4f}{z_critical:>15.4f}{decision:>15}")

Alpha       Z-Stat     P-Value     Critical z       Decision
------------------------------------------------------------
0.01        0.1322      0.5526        -2.3263 Fail to Reject
0.05        0.1322      0.5526        -1.6449 Fail to Reject
0.10        0.1322      0.5526        -1.2816 Fail to Reject
