In [11]:
import numpy as np

# Define the population mean under the null hypothesis
population_mean = 50

# Generate sample data (example)
np.random.seed(42)  # For reproducibility
sample_data = np.random.normal(loc=80, scale=0.4, size=30000)  # Sample mean is 73, std dev is 10, sample size is 30

# Calculate the observed sample mean
observed_mean = np.mean(sample_data)
observed_mean

# Calculate the observed test statistic (t-score)
sample_size = len(sample_data)
sample_std_dev = np.std(sample_data, ddof=1)
standard_error = sample_std_dev / np.sqrt(sample_size)
t_score_observed = (observed_mean - population_mean) / standard_error
t_score_observed

# Simulation: Shuffle data and calculate test statistics
num_simulations = 10000
simulated_means = np.zeros(num_simulations)

for i in range(num_simulations):
    shuffled_data = np.random.choice(sample_data, size=sample_size, replace=True)
    simulated_means[i] = np.mean(shuffled_data)

# Calculate test statistics for the shuffled data
simulated_t_scores = (simulated_means - population_mean) / standard_error

# Calculate the p-value
p_value = np.mean(np.abs(simulated_t_scores) >= np.abs(t_score_observed))
p_value

# Print the results
print(f"Observed mean: {observed_mean}")
print(f"Observed t-score: {t_score_observed}")
print(f"P-value: {p_value}")


Observed mean: 79.99985804010655
Observed t-score: 13007.649803637774
P-value: 0.4996


In [14]:
import numpy as np

# Define the population mean under the null hypothesis
population_mean = 70

# Generate sample data (example)
np.random.seed(42)  # For reproducibility
sample_data = np.random.normal(loc=73, scale=1, size=30)  # Sample mean is 73, std dev is 10, sample size is 30

# Calculate the observed sample mean and t-score
observed_mean = np.mean(sample_data)
sample_size = len(sample_data)
sample_std_dev = np.std(sample_data, ddof=1)
standard_error = sample_std_dev / np.sqrt(sample_size)
t_score_observed = (observed_mean - population_mean) / standard_error

# Simulation: Combine data under the null hypothesis
combined_data = sample_data - np.mean(sample_data) + population_mean

# Perform permutation test
num_simulations = 10000
permuted_t_scores = np.zeros(num_simulations)

for i in range(num_simulations):
    permuted_sample = np.random.choice(combined_data, size=sample_size, replace=True)
    permuted_mean = np.mean(permuted_sample)
    permuted_t_scores[i] = (permuted_mean - population_mean) / standard_error

# Calculate the p-value
p_value = np.mean(np.abs(permuted_t_scores) >= np.abs(t_score_observed))

# Print the results
print(f"Observed mean: {observed_mean}")
print(f"Observed t-score: {t_score_observed}")
print(f"P-value: {p_value}")

Observed mean: 72.81185310414895
Observed t-score: 17.11227083098026
P-value: 0.0
