In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pymc as pm

# Generate synthetic normal sensor data
np.random.seed(42)
n = 100
timestamps = pd.date_range("2025-01-01", periods=n, freq="s")
true_values = np.random.normal(loc=25, scale=0.5, size=n)

# Introduce corruption (spikes and missing values)
corrupted_values = true_values.copy()
corrupted_values[10] += 8    # spike
corrupted_values[25] -= 7    # drop
corrupted_values[40] = np.nan  # missing
corrupted_values[60] += 6
corrupted_values[75] = np.nan

# MCMC Correction using a simple normal model
corrected_values = corrupted_values.copy()

with pm.Model() as model:
    mu = pm.Normal("mu", mu=25, sigma=1)
    sigma = pm.HalfNormal("sigma", sigma=1)

    observed = pm.Normal("obs", mu=mu, sigma=sigma, observed=corrupted_values[~np.isnan(corrupted_values)])

    trace = pm.sample(1000, tune=500, chains=2, target_accept=0.95, return_inferencedata=False, progressbar=False)

    # Impute missing values using posterior mean
    corrected_mean = trace["mu"].mean()

    # Replace corrupted/missing manually identified
    for i in [10, 25, 40, 60, 75]:
        corrected_values[i] = corrected_mean

# Plotting
plt.figure(figsize=(10, 5))
plt.plot(timestamps, true_values, label="True (Original)", linestyle="--", color="green", alpha=0.7)
plt.plot(timestamps, corrupted_values, label="Corrupted", color="red", linewidth=1.5)
plt.plot(timestamps, corrected_values, label="MCMC Corrected", color="blue", linewidth=1.5)
plt.xlabel("Time")
plt.ylabel("Sensor Reading (°C)")
plt.title("MCMC Correction of Corrupted Time Series Data")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("/home/rbakyayita/Documents/MScIS-thesis/mscis-thesis/figures/MCMC_Simulation.png", dpi=300)
plt.close()

print("MCMC correction plot saved as '/home/rbakyayita/Documents/MScIS-thesis/mscis-thesis/figures/MCMC_Simulation.png'")


Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [mu, sigma]
Sampling 2 chains for 500 tune and 1_000 draw iterations (1_000 + 2_000 draws total) took 1 seconds.
We recommend running at least 4 chains for robust computation of convergence diagnostics


MCMC correction plot saved as '/home/rbakyayita/Documents/MScIS-thesis/mscis-thesis/figures/MCMC_Simulation.png'
