In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import concurrent.futures
import pandas as pd
import itertools
import time

# Exercise 17: Simulate Gaussian profiles with multiple changepoints 

In [None]:
def simulate_mcp_data(n, tau, mu, sigma=1.0, random_seed=None):
    """
    Simulate i.i.d. Gaussian data with several changes in the mean.
    
    Parameters:
      n           : int
                        Total number of observations.
      tau         : list or array-like
                        Positions (indices) where the mean changes.
                        Each change position must be in {1, 2, ..., n-1} and the list must be sorted in increasing order.
      mu          : list or array-like
                        Mean values for each segment. The length must equal len(change_positions) + 1.
      sigma       : float (default=1.0)
                        Standard deviation (assumed constant across all segments).
      random_seed : int or None (default=None)
                        Seed for reproducibility.
      
    Returns:
      mu   : numpy array of shape (n,)
             The true mean vector (each segment filled with its corresponding mean).
      data : numpy array of shape (n,)
             The simulated data: true mean + Gaussian noise.
    """
    if random_seed is not None:
        np.random.seed(random_seed)
    
    # Ensure change_positions is sorted and valid
    tau = sorted(tau)
    if any(t < 1 or t >= n for t in tau):
        raise ValueError("Each change position must be in {1, 2, ..., n-1}.")
    
    if len(mu) != len(tau) + 1:
        raise ValueError("The number of segment means must equal the number of change positions plus one.")
    
    tau   = np.concatenate(([0], tau, [n]))
    mu    = np.repeat(mu,tau[1:]-tau[:-1]) 
    noise = np.random.normal(loc=0, scale=sigma, size=n)
    data  = mu + noise
    
    return mu, data

In [None]:
# Example: simulate data with 5 changepoints
n   = 200
tau = [10, 70, 100, 150, 180]
mu  = [0.0, 2.0, -1.0, 3.0, 0.0, 1.0]

mu, y = simulate_mcp_data(n, tau, mu, sigma=1.0, random_seed=42)

# Plot the simulated data and the true underlying mean
x = np.arange(1, n+1)

plt.figure(figsize=(10, 5))
plt.plot(x, y, label='Simulated data', color='gray', alpha=0.7)
plt.step(x, mu, label='Mean vector', color='blue', linewidth=1)
for cp in tau:
    plt.axvline(x=cp, color='red', linestyle='--', label=f'Changepoint' if cp==tau[0] else None)
plt.xlabel('Observation Index')
plt.ylabel('Value')
plt.title('Simulated Gaussian Data with Multiple Changepoints')
plt.legend()
plt.show()