In [None]:
#  load synthetic telemetry and create engineered features
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from src.generate_data import generate_synthetic_telemetry
from src.preprocess import normalize_data

# Load data
df = generate_synthetic_telemetry()
df_norm = normalize_data(df)

# Add rolling statistics
df['cpu_rolling_mean'] = df['cpu'].rolling(window=20).mean()
df['cpu_rolling_std'] = df['cpu'].rolling(window=20).std()

df['latency_diff'] = df['latency'].diff()
df['errors_rolling_sum'] = df['errors'].rolling(window=10).sum()

# Visualize
fig, ax = plt.subplots(3, 1, figsize=(12, 8))
df['cpu_rolling_mean'].plot(ax=ax[0], title='Rolling Mean (CPU)')
df['cpu_rolling_std'].plot(ax=ax[1], title='Rolling Std (CPU)', color='orange')
df['errors_rolling_sum'].plot(ax=ax[2], title='Rolling Error Count', color='red')
plt.tight_layout()
plt.show()


### Advanced Feature Engineering

This section includes more advanced transformations for capturing cross-modal dynamics and temporal patterns:

- **FFT Mean**: Highlights dominant frequency components in latency to reveal periodic patterns.
- **Z-Scores**: Measures how far a value deviates from the mean — useful for flagging local outliers.
- **Lag Features**: Shifts variables backward to capture temporal dependencies (e.g., cause-effect relationships).
- **Rolling Correlation**: Tracks how strongly two signals are correlated over time.


In [None]:
from scipy.fft import fft
from scipy.stats import zscore

# FFT over latency — compute mean magnitude of frequency spectrum over sliding windows
def compute_fft_feature(series, window=64, step=1):
    fft_features = []
    for i in range(0, len(series) - window, step):
        segment = series[i:i + window]
        fft_vals = np.abs(fft(segment))[:window // 2]
        fft_features.append(np.mean(fft_vals))
    padding = [np.nan] * (len(series) - len(fft_features))
    return pd.Series(fft_features + padding)

df['latency_fft_mean'] = compute_fft_feature(df['latency'])

# Z-scores to capture how extreme each point is compared to the global distribution
df['cpu_z'] = zscore(df['cpu'], nan_policy='omit')
df['latency_z'] = zscore(df['latency'], nan_policy='omit')

# Lag features — capturing temporal causality
df['cpu_lag1'] = df['cpu'].shift(1)
df['latency_lag3'] = df['latency'].shift(3)
df['errors_lag2'] = df['errors'].shift(2)

# Rolling correlation between latency and CPU (window=20)
df['latency_cpu_corr_20'] = df['latency'].rolling(window=20).corr(df['cpu'])


### Feature Visualizations

We plot selected engineered features to visually inspect how they might help with anomaly detection:

- FFT highlights bursts or irregular patterns in latency.
- Rolling correlation breakdowns can signal functional decoupling between metrics.
- Z-scores flag values that are far from the norm.


In [None]:
fig, ax = plt.subplots(3, 1, figsize=(12, 8))

df['latency_fft_mean'].plot(ax=ax[0], title='Latency FFT Mean')
df['latency_cpu_corr_20'].plot(ax=ax[1], title='Rolling Correlation: Latency vs CPU')
df['cpu_z'].plot(ax=ax[2], title='Z-Score of CPU Usage', color='green')

plt.tight_layout()
plt.show()
