# 4.2 Hypothesis Testing Tutorial

This notebook covers key concepts in hypothesis testing including:
- Hypothesis Formulation
- Statistical Tests
- Experimental Design
- A/B Testing
- Results Analysis

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# Set random seed for reproducibility
np.random.seed(42)

## 1. Hypothesis Formulation

Let's demonstrate how to formulate and test hypotheses using a real-world example of comparing two marketing strategies.

In [None]:
# Generate sample data for two marketing strategies
strategy_a = np.random.normal(
    loc=100, scale=15, size=50
)  # Conversion rate for strategy A
strategy_b = np.random.normal(
    loc=110, scale=15, size=50
)  # Conversion rate for strategy B

# Visualize the data
plt.figure(figsize=(10, 6))
sns.boxplot(data=[strategy_a, strategy_b], labels=["Strategy A", "Strategy B"])
plt.title("Conversion Rates by Marketing Strategy")
plt.ylabel("Conversion Rate")
plt.show()

# Print summary statistics
print(
    "Strategy A - Mean: {:.2f}, SD: {:.2f}".format(
        np.mean(strategy_a), np.std(strategy_a)
    )
)
print(
    "Strategy B - Mean: {:.2f}, SD: {:.2f}".format(
        np.mean(strategy_b), np.std(strategy_b)
    )
)

## 2. Statistical Tests

Let's perform different types of statistical tests and interpret their results.

In [None]:
# Perform independent t-test
t_stat, p_value = stats.ttest_ind(strategy_a, strategy_b)
print("Independent t-test:")
print(f"t-statistic: {t_stat:.4f}")
print(f"p-value: {p_value:.4f}")

# Perform Mann-Whitney U test (non-parametric alternative)
u_stat, p_value_mw = stats.mannwhitneyu(strategy_a, strategy_b, alternative="two-sided")
print("\nMann-Whitney U test:")
print(f"U-statistic: {u_stat:.4f}")
print(f"p-value: {p_value_mw:.4f}")

## 3. A/B Testing

Let's simulate an A/B test and calculate important metrics.

In [None]:
def calculate_effect_size(group1, group2):
    """Calculate Cohen's d effect size"""
    n1, n2 = len(group1), len(group2)
    var1, var2 = np.var(group1, ddof=1), np.var(group2, ddof=1)
    pooled_se = np.sqrt(((n1 - 1) * var1 + (n2 - 1) * var2) / (n1 + n2 - 2))
    return (np.mean(group2) - np.mean(group1)) / pooled_se


# Calculate effect size
effect_size = calculate_effect_size(strategy_a, strategy_b)

# Calculate relative improvement
relative_improvement = (
    (np.mean(strategy_b) - np.mean(strategy_a)) / np.mean(strategy_a) * 100
)

print(f"Effect Size (Cohen's d): {effect_size:.4f}")
print(f"Relative Improvement: {relative_improvement:.2f}%")

# Visualize the distributions
plt.figure(figsize=(10, 6))
sns.kdeplot(data=strategy_a, label="Strategy A")
sns.kdeplot(data=strategy_b, label="Strategy B")
plt.title("Distribution of Conversion Rates")
plt.xlabel("Conversion Rate")
plt.ylabel("Density")
plt.legend()
plt.show()

## 4. Power Analysis

Let's perform a power analysis to determine the required sample size for future tests.

In [None]:
from scipy.stats import norm


def calculate_power(n, effect_size, alpha=0.05):
    """Calculate statistical power for a given sample size and effect size"""
    critical_value = norm.ppf(1 - alpha / 2)
    beta = norm.cdf(critical_value - effect_size * np.sqrt(n / 2)) - norm.cdf(
        -critical_value - effect_size * np.sqrt(n / 2)
    )
    return 1 - beta


# Calculate power for different sample sizes
sample_sizes = np.arange(10, 200, 10)
powers = [calculate_power(n, effect_size) for n in sample_sizes]

# Plot power analysis
plt.figure(figsize=(10, 6))
plt.plot(sample_sizes, powers)
plt.axhline(y=0.8, color="r", linestyle="--", label="0.8 Power Threshold")
plt.title("Power Analysis")
plt.xlabel("Sample Size (per group)")
plt.ylabel("Statistical Power")
plt.legend()
plt.grid(True)
plt.show()

## Practice Exercises

1. Conduct a one-way ANOVA test comparing three or more groups.

2. Perform a chi-square test of independence on categorical data.

3. Calculate and interpret confidence intervals for the difference between two means.

4. Design and analyze your own A/B test scenario with different metrics and success criteria.

5. Investigate how different effect sizes impact required sample sizes in power analysis.