In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import concurrent.futures
import pandas as pd
import itertools

# Exercises 1 & 2: Simulate different scenarios (Gaussian & Student)

In [None]:
def simulate_amoc_data(n, tau, mu1, mu2, sigma1=1.0, sigma2=1.0, 
                  distribution='gaussian', df=None, random_seed=None):
    """
    Generate a sequence with a changepoint at 'tau'.
    """
    if random_seed is not None:
        np.random.seed(random_seed)
    if tau < 1 or tau >= n:
        raise ValueError("tau must be in {1, 2, ..., n-1}.")
    
    # Construct true mean vector
    mu = np.concatenate([np.repeat(mu1, tau), np.repeat(mu2, n - tau)])
    
    # Generate noise based on specified distribution
    if distribution == 'gaussian':
        noise = np.concatenate([
            np.random.normal(loc=0, scale=sigma1, size=tau), 
            np.random.normal(loc=0, scale=sigma2, size=n-tau)
        ])
    elif distribution == 't':
        if df is None:
            raise ValueError("df must be provided for Student-t distribution.")
        noise = np.concatenate([
            stats.t.rvs(df, loc=0, scale=sigma1, size=tau), 
            stats.t.rvs(df, loc=0, scale=sigma2, size=n-tau)
        ])
    else:
        raise ValueError("Unknown distribution type.")
    
    data = mu + noise
    return mu, data

In [None]:
plt.figure(figsize=(8, 12))

# Case 1
plt.subplot(3, 1, 1)        
plt.step(x, mu_case1, label="Mean vector",    color="blue", linewidth=1)
plt.plot(x, y_case1,  label="Simulated data", color="grey", alpha=0.7)
plt.axvline(x=tau, color="red", linestyle="--", label=f"Changepoint (τ={tau})")
plt.title("A. Change in Mean Only")
plt.ylabel("y")
plt.legend()

# Case 2
plt.subplot(3, 1, 2)        
plt.step(x, mu_case2, color="blue", linewidth=1)
plt.plot(x, y_case2, color="grey", alpha=0.7)
plt.axvline(x=tau, color="red", linestyle="--")
plt.title("B. Change in Variance Only")
plt.ylabel("y")

# Case 3
plt.subplot(3, 1, 3)        
plt.step(x, mu_case3, color="blue", linewidth=1)
plt.plot(x, y_case3, color="grey", alpha=0.7)
plt.axvline(x=tau, color="red", linestyle="--")
plt.title("C. Change in Mean and Variance")
plt.ylabel("y")
plt.xlabel("t")

plt.tight_layout()
plt.show()

# Exercise 3: Bonferroni Testing (Gaussian)

In [None]:
def amoc_t_test_bonferroni(y, alpha):
    """
    Apply a two-sample t-test at every possible split and return the changepoint 
    if the minimum p-value (after Bonferroni correction) is significant.
    """
    n = len(y)
    # Compute a t-test for each possible split
    tests  = [stats.ttest_ind(y[:i], y[i:]) for i in range(1, n)]
    pvals  = np.array([t.pvalue for t in tests])
    argmin = np.argmin(pvals)
    if pvals[argmin] < alpha / (n - 1):
        return argmin + 1  # changepoint index (1-indexed)
    else:
        return np.nan

def one_simu_H0_bonf(param):
    """
    Simulate under H0 (no change) and return the false positive rate for Bonferroni.
    """
    n, rep, alpha, distribution = param['n'], param['rep'], param['alpha'], param['distribution']
    df      = param.get('df', None)
    tau_hats = np.array([])
    for _ in range(rep):
        y        = simulate_amoc_data(n, 1, 0, 0, 1, 1, distribution, df)[1]
        tau_hats = np.append(tau_hats, amoc_t_test_bonferroni(y, alpha))     
    return np.mean(~np.isnan(tau_hats))

def one_simu_H1_bonf(param):
    """
    Simulate under H1 (change in mean) and return the power for Bonferroni.
    The mean changes from 0 to delta at tau.
    """
    n, tau, rep, alpha, delta, distribution = param['n'], param['tau'], param['rep'], param['alpha'], param['delta'], param['distribution']
    df = param.get('df', None)
    tau_hats = np.array([])
    for _ in range(rep):
        y       = simulate_amoc_data(n, tau, 0, delta, 1, 1, distribution, df)[1]
        tau_hats = np.append(tau_hats, amoc_t_test_bonferroni(y, alpha)) 
    return np.mean(~np.isnan(tau_hats))

In [None]:
nb_thr = 10  # number of parallel workers
rep    = 100
n      = 2**10
alpha  = 0.05

################################################################################
# Simulation under H0

param_H0_bonf = {'n': n, 'rep': rep, 'alpha': alpha, 'distribution': "gaussian"}
fpr_bonf      = one_simu_H0_bonf(param_H0_bonf)
print("Bonferroni false positive rate (H0):", fpr_bonf)

################################################################################
# Simulation under H1

tau_list = [2**i for i in range(1, 10)]
# For Gaussian, delta is set to np.sqrt(70/n) for effect size
delta    = np.sqrt(70 / n)

params_bonf = pd.DataFrame(
    itertools.product([n], [alpha], [rep], tau_list, [delta], ['gaussian']),
    columns = ['n', 'alpha', 'rep', 'tau', 'delta', 'distribution']
)
# For Bonferroni H1, use the same test as in one_simu_H1_bonf
with concurrent.futures.ProcessPoolExecutor(max_workers=nb_thr) as executor:
    params_bonf['TPR'] = np.array(list(executor.map(
        one_simu_H1_bonf,
        params_bonf.to_dict(orient='records')
    )))
print(params_bonf)