# TSRBench Quick Start

This notebook demonstrates how to inject realistic corruptions into any time series using TSRBench.

TSRBench uses **Extreme Value Theory (EVT)** to calibrate noise amplitudes, so injected corruptions respect the statistical extremes of your data.

In [None]:
# pip install tsrbench
from tsrbench import CollectiveNoise
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## 1. Corrupt a 1D Signal

The simplest use case: a single time series as a 1D numpy array.

`corrupt()` handles standardization, noise injection (EVT-calibrated), and inverse-transformation internally.

In [None]:
# Generate a synthetic signal
np.random.seed(42)
t = np.linspace(0, 50, 5000)
signal = np.sin(t) + 0.3 * np.sin(5 * t) + 0.1 * np.random.randn(len(t))

# Inject noise at severity level 3
cn = CollectiveNoise(seed=2025)
results = cn.corrupt(signal, noise_level=3)

print("Output keys:", list(results.keys()))
print("Each value shape:", results['shift'].shape)

In [None]:
# Visualize all corruption types
fig, axes = plt.subplots(3, 2, figsize=(16, 10), sharex=True, sharey=True)

types = ['shift', 'spike', 'impulse', 'gaussian', 'missing', 'combined']
titles = ['Level Shift', 'Exponential Spike', 'Impulse', 'Gaussian', 'Missing', 'Combined']
colors = ['#F44336', '#FF9800', '#9C27B0', '#4CAF50', '#607D8B', '#E91E63']

for i, (ntype, title, color) in enumerate(zip(types, titles, colors)):
    ax = axes[i // 2, i % 2]
    ax.plot(signal, color='#2196F3', linewidth=0.5, alpha=0.5, label='Original')
    ax.plot(results[ntype], color=color, linewidth=0.5, alpha=0.8, label=title)
    ax.set_title(title, fontweight='bold')
    ax.legend(loc='upper right', fontsize=8)

axes[-1, 0].set_xlabel('Time Step')
axes[-1, 1].set_xlabel('Time Step')
fig.suptitle('TSRBench: 6 Corruption Types (Level 3)', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## 2. Corrupt a Multi-Column DataFrame

In practice, time series datasets have multiple columns (e.g., temperature, humidity, pressure) and a date/index column.

`corrupt()` handles this directly — pass a DataFrame and set `skip_first_col=True` to exclude the date column.

In [None]:
# Create a realistic multi-column dataset
np.random.seed(42)
n = 5000
df = pd.DataFrame({
    'date': pd.date_range('2020-01-01', periods=n, freq='h'),
    'temperature': 20 + 10 * np.sin(np.linspace(0, 20, n)) + np.random.randn(n),
    'humidity': 60 + 5 * np.cos(np.linspace(0, 15, n)) + 2 * np.random.randn(n),
    'pressure': 1013 + 3 * np.sin(np.linspace(0, 10, n)) + 0.5 * np.random.randn(n),
})
print(df.head())
print(f"Shape: {df.shape}")

In [None]:
# Corrupt the DataFrame (date column is automatically skipped)
cn = CollectiveNoise(seed=2025)
results = cn.corrupt(df, noise_level=3, skip_first_col=True)

print("Output keys:", list(results.keys()))
print("Type:", type(results['shift']))  # returns DataFrame
print("Date column preserved:", (results['shift']['date'] == df['date']).all())
print()
print(results['shift'].head())

In [None]:
# Visualize corruptions on the temperature column
fig, axes = plt.subplots(3, 2, figsize=(16, 10), sharex=True)

col = 'temperature'
types = ['shift', 'spike', 'impulse', 'gaussian', 'missing', 'combined']
titles = ['Level Shift', 'Exponential Spike', 'Impulse', 'Gaussian', 'Missing', 'Combined']
colors = ['#F44336', '#FF9800', '#9C27B0', '#4CAF50', '#607D8B', '#E91E63']

for i, (ntype, title, color) in enumerate(zip(types, titles, colors)):
    ax = axes[i // 2, i % 2]
    ax.plot(df[col].values, color='#2196F3', linewidth=0.5, alpha=0.5, label='Original')
    ax.plot(results[ntype][col].values, color=color, linewidth=0.5, alpha=0.8, label=title)
    ax.set_title(f'{title} — {col}', fontweight='bold')
    ax.legend(loc='upper right', fontsize=8)

axes[-1, 0].set_xlabel('Time Step')
axes[-1, 1].set_xlabel('Time Step')
fig.suptitle('Multi-Column DataFrame Corruption (Level 3)', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## 3. Comparing Severity Levels

TSRBench provides 5 severity levels. Higher levels = more frequent, longer, and more extreme corruptions.

In [None]:
# Compare level shift across all 5 severity levels
fig, axes = plt.subplots(5, 1, figsize=(16, 12), sharex=True, sharey=True)

for level in range(1, 6):
    cn = CollectiveNoise(seed=2025)
    out = cn.corrupt(signal, noise_level=level)
    ax = axes[level - 1]
    ax.plot(signal, color='#2196F3', linewidth=0.5, alpha=0.4, label='Original')
    ax.plot(out['combined'], color='#E91E63', linewidth=0.5, alpha=0.8, label=f'Level {level}')
    ax.set_title(f'Severity Level {level}', fontsize=11, fontweight='bold', loc='left')
    ax.legend(loc='upper right', fontsize=8)

axes[-1].set_xlabel('Time Step')
fig.suptitle('Combined Corruption Across Severity Levels', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## 4. 2D Numpy Array Input

`corrupt()` also accepts 2D numpy arrays directly (each column = one time series).

In [None]:
# 2D numpy array: 3 channels
np.random.seed(42)
X = np.column_stack([
    np.sin(np.linspace(0, 20, 3000)) + 0.1 * np.random.randn(3000),
    np.cos(np.linspace(0, 15, 3000)) * 2 + 0.2 * np.random.randn(3000),
    np.random.randn(3000).cumsum() * 0.02,
])
print("Input shape:", X.shape)

cn = CollectiveNoise(seed=2025)
results = cn.corrupt(X, noise_level=3, skip_first_col=False)  # no date column

print("Output type:", type(results['shift']))
print("Output shape:", results['shift'].shape)

In [None]:
# Visualize all 3 channels with combined corruption
fig, axes = plt.subplots(3, 1, figsize=(16, 8), sharex=True)
channel_names = ['Channel 0 (sine)', 'Channel 1 (cosine)', 'Channel 2 (random walk)']

for i in range(3):
    axes[i].plot(X[:, i], color='#2196F3', linewidth=0.5, alpha=0.5, label='Original')
    axes[i].plot(results['combined'][:, i], color='#E91E63', linewidth=0.5, alpha=0.8, label='Combined')
    axes[i].set_title(channel_names[i], fontweight='bold', loc='left')
    axes[i].legend(loc='upper right', fontsize=8)

axes[-1].set_xlabel('Time Step')
fig.suptitle('2D Array: Combined Corruption (Level 3)', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## 5. Low-Level API

For finer control, you can use individual injection methods on **standardized** 1D signals.

These methods return the **noise component** (not the corrupted signal), so you add it to your signal manually.

In [None]:
from sklearn.preprocessing import StandardScaler

# Standardize first (required for low-level API)
scaler = StandardScaler()
signal_std = scaler.fit_transform(signal.reshape(-1, 1)).ravel()

cn = CollectiveNoise(seed=2025)

# Individual noise types (returns noise, not corrupted signal)
shift_noise = cn.inject_level_shift(signal_std, noise_level=3)
spike_noise = cn.inject_exp_spike(signal_std, noise_level=3)
impulse_noise = cn.inject_impulse(signal_std, noise_level=3)
gaussian_noise = cn.inject_gaussian(signal_std, noise_level=3)

# Non-overlapping (all types with exclude masks)
all_results = cn.inject_all_noise(signal_std, noise_level=3)

print("Shift noise non-zero points:", np.count_nonzero(shift_noise))
print("Spike noise non-zero points:", np.count_nonzero(spike_noise))
print("Impulse noise non-zero points:", np.count_nonzero(impulse_noise))
print("Missing mask True points:", all_results['missing_mask'].sum())

In [None]:
# Visualize isolated noise signals
fig, axes = plt.subplots(2, 2, figsize=(16, 6), sharex=True)

noise_data = [shift_noise, spike_noise, impulse_noise, gaussian_noise]
noise_names = ['Level Shift', 'Exponential Spike', 'Impulse', 'Gaussian']
noise_colors = ['#F44336', '#FF9800', '#9C27B0', '#4CAF50']

for i, (nd, name, color) in enumerate(zip(noise_data, noise_names, noise_colors)):
    ax = axes[i // 2, i % 2]
    ax.fill_between(range(len(nd)), nd, 0, alpha=0.5, color=color)
    ax.plot(nd, color=color, linewidth=0.3)
    ax.axhline(y=0, color='gray', linewidth=0.5, linestyle='--')
    ax.set_title(f'{name} Noise (isolated)', fontweight='bold')

axes[-1, 0].set_xlabel('Time Step')
axes[-1, 1].set_xlabel('Time Step')
fig.suptitle('Isolated Noise Components (Standardized Space)', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

## 6. Custom Parameters

You can customize frequency, duration, amplitude, and Gaussian sigma for each severity level.

In [None]:
# Custom parameters: more aggressive corruption
cn_custom = CollectiveNoise(
    seed=2025,
    level_shift_args={
        1: {'freq': 0.005, 'dur': 10, 'amp': 0.001},
        2: {'freq': 0.010, 'dur': 15, 'amp': 0.0005},
        3: {'freq': 0.015, 'dur': 20, 'amp': 0.0001},
    },
    gaussian_args={
        1: {'sigma': 0.2},
        2: {'sigma': 0.5},
        3: {'sigma': 1.0},
    },
    min_sigma=0.01,
)

out_custom = cn_custom.corrupt(signal, noise_level=2)

fig, ax = plt.subplots(figsize=(16, 4))
ax.plot(signal, color='#2196F3', linewidth=0.5, alpha=0.5, label='Original')
ax.plot(out_custom['combined'], color='#E91E63', linewidth=0.5, alpha=0.8, label='Custom Combined (Level 2)')
ax.legend()
ax.set_xlabel('Time Step')
ax.set_title('Custom Parameters', fontweight='bold')
plt.tight_layout()
plt.show()