# 02 — Deep Baseline (Conv1D on ECG windows)

This notebook:
1. Builds beat-centered ECG windows from raw ECG (requires `00_preview_data` raw arrays or cached raw Parquet).
2. Trains a tiny Conv1D with two heads (SBP, DBP).
3. Evaluates with subject-wise splits and plots.

> Keep it light: this is a **baseline**, not a tuned SOTA model.

In [None]:
# Optional installs (uncomment locally)
# !pip install torch numpy pandas matplotlib scikit-learn

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn
from sklearn.model_selection import GroupKFold
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.stats import pearsonr

DATA_RAW = Path('data/raw')
DATA_INTERIM = Path('data/interim')
beat_df = pd.read_parquet(DATA_INTERIM/'beat_table.parquet')
beat_df = beat_df.sort_values(['subject_id','beat_idx']).reset_index(drop=True)

## 1) Build beat-centered windows (demo)
This demo creates synthetic windows as placeholders (since raw ECG is not cached in this notebook). Replace with real ECG windows using your cached raw Parquets for full training.

In [None]:
window_sec = 4.0
fs = 100
win = int(window_sec*fs)

# Placeholder toy windows: in real use, slice ECG around each beat
# Here we create a simple sine-like window with tiny variations just to test the pipeline
rng = np.random.default_rng(0)
X = rng.normal(0, 0.05, size=(len(beat_df), win)).astype(np.float32)

y = np.stack([beat_df['SBP'].values, beat_df['DBP'].values], axis=1).astype(np.float32)
groups = beat_df['subject_id'].values

## 2) Tiny Conv1D model

In [None]:
class TinyConv1D(nn.Module):
    def __init__(self, in_ch=1, out_dim=2):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(in_ch, 8, 5, stride=2, padding=2), nn.ReLU(),
            nn.Conv1d(8, 16, 5, stride=2, padding=2), nn.ReLU(),
            nn.AdaptiveAvgPool1d(1)
        )
        self.head = nn.Sequential(nn.Flatten(), nn.Linear(16, 32), nn.ReLU(), nn.Linear(32, out_dim))
    def forward(self, x):
        z = self.net(x)
        return self.head(z)

model = TinyConv1D()
criterion = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

## 3) Subject-wise CV training (1 epoch for speed)

In [None]:
gkf = GroupKFold(n_splits=5)
all_metrics = []
X_t = torch.from_numpy(X)[:, None, :]
y_t = torch.from_numpy(y)

for split, (tr, va) in enumerate(gkf.split(X, y, groups), 1):
    m = TinyConv1D()
    opt = torch.optim.Adam(m.parameters(), lr=1e-3)
    # one tiny epoch
    m.train()
    for _ in range(1):
        opt.zero_grad()
        pred = m(X_t[tr])
        loss = criterion(pred, y_t[tr])
        loss.backward()
        opt.step()
    # eval
    m.eval()
    with torch.no_grad():
        p = m(X_t[va]).numpy()
    mae_sbp = mean_absolute_error(y[va,0], p[:,0])
    rmse_sbp = mean_squared_error(y[va,0], p[:,0], squared=False)
    r_sbp = pearsonr(y[va,0], p[:,0])[0]

    mae_dbp = mean_absolute_error(y[va,1], p[:,1])
    rmse_dbp = mean_squared_error(y[va,1], p[:,1], squared=False)
    r_dbp = pearsonr(y[va,1], p[:,1])[0]

    all_metrics.append((split, mae_sbp, rmse_sbp, r_sbp, mae_dbp, rmse_dbp, r_dbp))

pd.DataFrame(all_metrics, columns=['Split','MAE_SBP','RMSE_SBP','r_SBP','MAE_DBP','RMSE_DBP','r_DBP'])

## 4) Example plot

In [None]:
plt.figure()
plt.plot(y[:200,0])
plt.plot(y[:200,1])
plt.title('Example: SBP/DBP values (first 200 beats)')
plt.xlabel('Beat index')
plt.ylabel('mmHg')
plt.show()