In [1]:
#1 Question 6: Write a Python program to perform a one-sample Z-test and interpret the
# result for a given dataset.
import numpy as np
from scipy import stats
import math

# Given dataset
data = [52, 50, 51, 49, 48, 52, 53, 50, 47, 51,
        49, 50, 52, 48, 51, 50, 49, 53, 52, 48,
        50, 49, 51, 52, 47, 50, 48, 49, 51, 50]

# Known population parameters
population_mean = 50
population_std = 2.5
alpha = 0.05   # Significance level

# Step 1: Calculate sample statistics
sample_mean = np.mean(data)
n = len(data)

# Step 2: Calculate Z-score
z_score = (sample_mean - population_mean) / (population_std / math.sqrt(n))

# Step 3: Calculate p-value (two-tailed test)
p_value = 2 * (1 - stats.norm.cdf(abs(z_score)))

# Step 4: Print results
print("Sample Mean:", sample_mean)
print("Z-Score:", z_score)
print("P-Value:", p_value)

# Step 5: Interpretation
if p_value < alpha:
    print("Reject the null hypothesis.")
    print("The sample mean is significantly different from the population mean.")
else:
    print("Fail to reject the null hypothesis.")
    print("The sample mean is NOT significantly different from the population mean.")



Sample Mean: 50.06666666666667
Z-Score: 0.14605934866805156
P-Value: 0.883874537256701
Fail to reject the null hypothesis.
The sample mean is NOT significantly different from the population mean.


In [None]:
#2 Question 8: Generate multiple samples from a non-normal distribution and implement
# the Central Limit Theorem using Python.
# Central Limit Theorem Demonstration

import numpy as np
import matplotlib.pyplot as plt

# Step 1: Create a non-normal distribution (Exponential)
population = np.random.exponential(scale=2, size=100000)

# Step 2: Take multiple samples and calculate sample means
sample_size = 30
num_samples = 1000

sample_means = []

for _ in range(num_samples):
    sample = np.random.choice(population, sample_size)
    sample_means.append(np.mean(sample))

# Step 3: Plot original population distribution
plt.figure()
plt.hist(population, bins=50)
plt.title("Original Non-Normal Distribution")
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.show()

# Step 4: Plot distribution of sample means
plt.figure()
plt.hist(sample_means, bins=30)
plt.title("Distribution of Sample Means (CLT)")
plt.xlabel("Sample Mean")
plt.ylabel("Frequency")
plt.show()

# Step 5: Print comparison
print("Population Mean:", np.mean(population))
print("Mean of Sample Means:", np.mean(sample_means))



In [None]:
#4 Question 10: Perform a Chi-square goodness-of-fit test using Python to compare
# observed and expected distributions, and explain the outcome.
# Chi-Square Goodness-of-Fit Test

import numpy as np
from scipy.stats import chisquare

# Observed frequencies (example)
observed = np.array([40, 35, 15, 10])

# Expected frequencies (example: equal distribution)
expected = np.array([25, 25, 25, 25])

# Perform Chi-square test
chi_statistic, p_value = chisquare(f_obs=observed, f_exp=expected)

# Print results
print("Observed Frequencies:", observed)
print("Expected Frequencies:", expected)
print("Chi-Square Statistic:", chi_statistic)
print("P-Value:", p_value)

# Decision
alpha = 0.05

if p_value < alpha:
    print("Reject Null Hypothesis")
    print("Observed distribution is significantly different from expected distribution.")
else:
    print("Fail to Reject Null Hypothesis")
    print("No significant difference between observed and expected distribution.")



In [None]:
#3 Question 9: Write a Python function to calculate and visualize the confidence interval
# for a sample mean.

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

# Function to calculate confidence interval
def calculate_confidence_interval(data, confidence=0.95):
    n = len(data)
    mean = np.mean(data)
    std_dev = np.std(data, ddof=1)   # sample standard deviation
    std_error = std_dev / np.sqrt(n)

    # t critical value
    t_critical = stats.t.ppf((1 + confidence) / 2, df=n-1)

    margin_of_error = t_critical * std_error

    lower = mean - margin_of_error
    upper = mean + margin_of_error

    return mean, lower, upper, margin_of_error

# Example dataset
data = [12, 15, 14, 10, 13, 16, 11, 14, 15, 13]

# Calculate CI
mean, lower, upper, margin = calculate_confidence_interval(data, 0.95)

print("Sample Mean:", mean)
print("95% Confidence Interval:", (lower, upper))

# Visualization
plt.figure()
plt.errorbar(0, mean, yerr=margin, fmt='o', capsize=8)
plt.xlim(-1, 1)
plt.xticks([])
plt.ylabel("Sample Mean")
plt.title("95% Confidence Interval for Sample Mean")
plt.show()
