# Cardiometabolic Risk: SSL Pretraining on Colab

**Phase 5**: Train a self-supervised PPG encoder on 4,133 signals using Colab T4 GPU

**Expected runtime**: 8‚Äì12 hours (50 epochs)  
**Output**: Pretrained encoder checkpoint + training metrics

**Prerequisites**:
- Data uploaded to Google Drive: `/MyDrive/cardiometabolic-risk-colab/data/processed/`
- GitHub repo exists and is public

---

## Setup: Mount Drive & Clone Repo

In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

from pathlib import Path
COLAB_DRIVE_PATH = Path('/content/drive/MyDrive/cardiometabolic-risk-colab')
print(f"‚úÖ Drive mounted: {COLAB_DRIVE_PATH.exists()}")

Mounted at /content/drive
‚úÖ Drive mounted: True


In [2]:
import subprocess
import os

repo_dir = Path('/content/drive/MyDrive/cardiometabolic-risk-colab')
repo_url = "https://github.com/Yendoh-Derek/Cardiometabolic-Risk-System-for-Wearables.git"

if not repo_dir.exists():
    print("Cloning repository...")
    subprocess.run(["git", "clone", "--depth", "1", repo_url, str(repo_dir)], check=True)
    print(f"‚úÖ Repo cloned: {repo_dir}")
else:
    print(f"‚úÖ Repo already present: {repo_dir}")

os.chdir(repo_dir)

‚úÖ Repo already present: /content/drive/MyDrive/cardiometabolic-risk-colab


## Install Dependencies

In [3]:
!pip install -r requirements.txt -q
print("‚úÖ Dependencies installed")

[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m86.4/86.4 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m91.2/91.2 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m40.1/40.1 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m163.8/163.8 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m12.4/12.4 MB[0m [31m142.4 MB/s[0m eta [36m0:00:00[0m

In [4]:
!pip install pandas==2.2.2 wfdb==4.1.2 -q
!pip check

[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/12.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m‚îÅ‚îÅ‚îÅ‚îÅ[0m[91m‚ï∏[0m[90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.5/12.7 MB[0m [31m47.0 MB/s[0m eta [36m0:00:01[0m[2K   [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[91m‚ï∏[0m[90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m10.5/12.7 MB[0m [31m198.9 MB/s[0m eta [36m0:00:01[0m[2K   [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[91m‚ï∏[0m [32m12.7/12.7 MB[0m [31m262.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m12.7/12.7 MB[0m [31m1

## Verify GPU & Imports

In [5]:
# Check GPU
!nvidia-smi --query-gpu=name --format=csv,noheader

import torch
print(f"\n‚úÖ GPU available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"   Device: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

Tesla T4

‚úÖ GPU available: True
   Device: Tesla T4
   Memory: 15.8 GB


In [6]:
import sys
sys.path.insert(0, str(repo_dir / "colab_src"))

from colab_src.models.ssl.config import SSLConfig
from colab_src.models.ssl.encoder import ResNetEncoder
from colab_src.models.ssl.decoder import ResNetDecoder
from colab_src.models.ssl.losses import SSLLoss

print("‚úÖ All imports successful")

# Load config
cfg = SSLConfig.from_yaml("configs/ssl_pretraining.yaml")
print(f"‚úÖ Config loaded")

‚úÖ All imports successful
‚úÖ Config loaded


## Verify Data Integrity

In [None]:
import pandas as pd
import numpy as np

data_dir = COLAB_DRIVE_PATH / "data" / "processed"

# If in Colab, symlink Drive data to repo structure
try:
    drive_data = COLAB_DRIVE_PATH / "data" / "processed"
    if drive_data.exists() and not data_dir.exists():
        print(f"Linking Drive data: {drive_data} ‚Üí {data_dir}")
        subprocess.run(["ln", "-s", str(drive_data), str(data_dir)], check=True)
except Exception as e:
    print(f"Warning: {e}")

# Verify Phase 5A windowed data (617K √ó 1,250 samples)
required_files = {
    "mimic_windows_metadata.parquet": "Window metadata (653K rows √ó train/val split)",
    "mimic_windows.npy": "Window signal array [653716, 1250]",
    "denoised_signal_index.json": "Signal index mapping (source signal ID ‚Üí window rows)",
}

print("üîç Checking Phase 5A windowed data integrity...\n")
all_present = True

for fname, description in required_files.items():
    fpath = data_dir / fname
    if fpath.exists():
        if fpath.is_dir():
            count = len(list(fpath.glob("*.npy")))
            print(f"‚úÖ {fname:40s} ({count:5d} files) ‚Äî {description}")
        else:
            size_mb = fpath.stat().st_size / 1e6
            print(f"‚úÖ {fname:40s} ({size_mb:6.1f} MB) ‚Äî {description}")
    else:
        print(f"‚ùå {fname:40s} NOT FOUND ‚Äî {description}")
        all_present = False

if not all_present:
    print("\n‚ö†Ô∏è  MISSING PHASE 5A DATA")
    print("\nTo generate windows from denoised signals:")
    print("  1. Run: colab_src/data_pipeline/generate_mimic_windows.py")
    print("  2. Input: data/processed/denoised_signal_index.json")
    print("  3. Output: data/processed/mimic_windows.npy + mimic_windows_metadata.parquet")
    print("\nOr upload pre-generated data to Google Drive:")
    print(f"  Path: /MyDrive/cardiometabolic-risk-colab/data/processed/")
    raise FileNotFoundError("Phase 5A window data not found")

# Verify Phase 5A window metadata
windows_meta = pd.read_parquet(data_dir / "mimic_windows_metadata.parquet")
print(f"\n‚úÖ Window metadata: {len(windows_meta)} total rows")
print(f"   Columns: {list(windows_meta.columns)}")
if 'split' in windows_meta.columns:
    print(f"   Train/Val split: {windows_meta['split'].value_counts().to_dict()}")

# Check window signals (use memmap to avoid loading all 653K into memory)
windows_path = data_dir / "mimic_windows.npy"
windows_memmap = np.load(windows_path, mmap_mode='r')
print(f"\n‚úÖ Window signals: shape {windows_memmap.shape}")
assert windows_memmap.shape == (653716, 1250), f"Shape mismatch! Expected (653716, 1250), got {windows_memmap.shape}"
print(f"   Signal format: 653716 windows √ó 1250 samples per window (10 sec @ 125 Hz)")
print(f"   Memory usage (mmap): {windows_path.stat().st_size / 1e9:.2f} GB")

print("\n" + "="*70)
print("‚úÖ PHASE 5A WINDOWED DATA READY FOR TRAINING")
print("="*70)

üîç Checking data integrity...

‚úÖ ssl_pretraining_data.parquet             (   0.2 MB) ‚Äî Training metadata
‚úÖ ssl_validation_data.parquet              (   0.0 MB) ‚Äî Validation metadata
‚úÖ denoised_signals                         ( 4420 files) ‚Äî Ground truth signals (denoised)

‚úÖ Training dataset: 4133 samples
‚úÖ Validation dataset: 200 samples

‚úÖ ALL DATA READY FOR TRAINING


## Progress Tracking Setup
Monitor training progress with real-time metrics and ETA estimation

In [None]:
# Import progress tracking modules
from colab_src.utils.progress_tracker import TrainingProgressTracker, monitor_training_live

# Initialize progress tracker
tracker = TrainingProgressTracker(
    output_dir="logs/training",
    name="SSL Pretraining Phase 5B"
)

### Temporary

In [8]:
import json
from pathlib import Path

repo_dir = Path("/content/drive/MyDrive/cardiometabolic-risk-colab")

# Check training history
history_file = repo_dir / "logs/ssl/training_history.json"
if history_file.exists():
    with open(history_file) as f:
        history = json.load(f)
    print(f"‚úÖ Training history found!")
    print(f"   Epochs completed: {len(history['train_loss'])}")
    print(f"   Best val loss: {min(history['val_loss']):.4f}")
    print(f"   Final train loss: {history['train_loss'][-1]:.4f}")
else:
    print("‚ùå Training history not found")

# Check checkpoint
checkpoint_file = repo_dir / "checkpoints/ssl/best_model.pt"
if checkpoint_file.exists():
    print(f"‚úÖ Best model checkpoint saved: {checkpoint_file}")
else:
    print("‚ùå Checkpoint not found")

‚úÖ Training history found!
   Epochs completed: 23
   Best val loss: 0.2626
   Final train loss: 0.2748
‚úÖ Best model checkpoint saved: /content/drive/MyDrive/cardiometabolic-risk-colab/checkpoints/ssl/best_model.pt


In [9]:
import json
from pathlib import Path

repo_dir = Path("/content/drive/MyDrive/cardiometabolic-risk-colab")
history_file = repo_dir / "logs/ssl/training_history.json"

with open(history_file) as f:
    history = json.load(f)

print(f"Training stopped at epoch {len(history['train_loss'])}")
print(f"Early stopping likely triggered (patience exhausted)")
print(f"\nValidation loss by epoch:")
for i, (train, val) in enumerate(zip(history['train_loss'][-5:], history['val_loss'][-5:]),
                                   start=len(history['train_loss'])-4):
    print(f"  Epoch {i}: train={train:.4f}, val={val:.4f}")

Training stopped at epoch 23
Early stopping likely triggered (patience exhausted)

Validation loss by epoch:
  Epoch 19: train=0.2771, val=0.2630
  Epoch 20: train=0.2774, val=0.2630
  Epoch 21: train=0.2769, val=0.2631
  Epoch 22: train=0.2742, val=0.2630
  Epoch 23: train=0.2748, val=0.2632


## Phase 5: Run Full Training (50 Epochs)

In [13]:
# Create output directory for checkpoints
checkpoint_dir = COLAB_DRIVE_PATH / "checkpoints"
checkpoint_dir.mkdir(parents=True, exist_ok=True)

print(f"üìÅ Checkpoints will be saved to:")
print(f"   {checkpoint_dir}")
print(f"\n‚è±Ô∏è  Estimated duration: 8‚Äì12 hours")
print(f"üíæ Batch size: 8 (with 4√ó accumulation = eff. 32)")
print(f"üî¢ Epochs: 50")
print(f"üìä Training samples: 4,133")
print("\n" + "="*70)
print("Starting training...")
print("="*70)

üìÅ Checkpoints will be saved to:
   /content/drive/MyDrive/cardiometabolic-risk-colab/checkpoints

‚è±Ô∏è  Estimated duration: 8‚Äì12 hours
üíæ Batch size: 8 (with 4√ó accumulation = eff. 32)
üî¢ Epochs: 50
üìä Training samples: 4,133

Starting training...


In [None]:
# Run training script with auto-detect device
tracker.start()

cmd = [
    sys.executable,
    "-m",
    "colab_src.models.ssl.train",
    "--config", str(repo_dir / "configs/ssl_pretraining.yaml"),
    "--data-dir", str(data_dir),
    "--epochs", "50",
    "--log-dir", str(repo_dir / "logs/training"),
    "--checkpoint-dir", str(repo_dir / "checkpoints/ssl"),
]

print(f"üöÄ Starting Phase 5B training...\n")
print(f"Command: {' '.join(cmd)}\n")
print("=" * 70)

# Run training with progress tracking
result = subprocess.run(cmd, cwd=str(repo_dir), capture_output=True, text=True)

# Display training output
training_output = result.stderr  # Training logs go to stderr
print(training_output)

if result.returncode == 0:
    print("=" * 70)
    print("\n‚úÖ Training completed successfully!")
    tracker.summary()
else:
    print(f"\n‚ùå Training failed with exit code: {result.returncode}")
    print("See output above for error details")
    sys.exit(1)

üöÄ Starting Phase 5 training...

Command: /usr/bin/python3 -m colab_src.models.ssl.train --config /content/drive/MyDrive/cardiometabolic-risk-colab/configs/ssl_pretraining.yaml --data-dir /content/drive/MyDrive/cardiometabolic-risk-colab/data/processed --epochs 50

2026-01-13 08:49:23,457 - __main__ - INFO - Loading config from /content/drive/MyDrive/cardiometabolic-risk-colab/configs/ssl_pretraining.yaml
2026-01-13 08:49:23,797 - __main__ - INFO - ‚úÖ Auto-detected GPU: Tesla T4
2026-01-13 08:49:23,798 - __main__ - INFO - Data directory overridden: /content/drive/MyDrive/cardiometabolic-risk-colab/data/processed
2026-01-13 08:49:23,798 - __main__ - INFO - 
2026-01-13 08:49:23,798 - __main__ - INFO - Configuration:
2026-01-13 08:49:23,798 - __main__ - INFO -   Device:              cuda
2026-01-13 08:49:23,798 - __main__ - INFO -   Data dir:            /content/drive/MyDrive/cardiometabolic-risk-colab/data/processed
2026-01-13 08:49:23,798 - __main__ - INFO -   Epochs:              50

### Temperary diagnostic cells

In [15]:
# Pull latest code from GitHub
import subprocess
import os
from pathlib import Path

repo_dir = Path('/content/drive/MyDrive/cardiometabolic-risk-colab')

print("Pulling latest changes from GitHub...")
result = subprocess.run(
    ["git", "pull", "origin", "main"],
    cwd=str(repo_dir),
    capture_output=True,
    text=True
)

print(result.stdout)
if result.returncode == 0:
    print("‚úÖ Code updated successfully!")
else:
    print(f"‚ö†Ô∏è  Error: {result.stderr}")

Pulling latest changes from GitHub...
Updating c2dda54..b3de6c5
Fast-forward
 configs/ssl_pretraining.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

‚úÖ Code updated successfully!


## Real-Time Progress Monitoring
Monitor training while it runs by reading the live progress file (execute during training)

In [None]:
# Run live monitoring (updates every 5 seconds from training_history.json)
monitor_training_live(history_file="logs/training/training_history.json", update_interval=5)

## Validate & Visualize Results

In [None]:
import json
import matplotlib.pyplot as plt
import numpy as np

# Load training metrics from progress tracker
history_file = Path("logs/training/training_history.json")

if history_file.exists():
    with open(history_file) as f:
        history = json.load(f)

    # Create comprehensive visualization
    fig = plt.figure(figsize=(16, 10))
    gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)

    # 1. Main loss curves
    ax1 = fig.add_subplot(gs[0, :2])
    epochs = range(1, len(history['train_loss']) + 1)
    ax1.plot(epochs, history['train_loss'], 'o-', linewidth=2, markersize=4, label='Train Loss')
    if history.get('val_loss'):
        ax1.plot(epochs, history['val_loss'], 's-', linewidth=2, markersize=4, label='Val Loss')
        best_idx = history['val_loss'].index(min(history['val_loss']))
        ax1.axvline(best_idx + 1, color='green', linestyle='--', alpha=0.5, label=f'Best @ Epoch {best_idx + 1}')
    ax1.set_xlabel('Epoch', fontsize=11)
    ax1.set_ylabel('Loss', fontsize=11)
    ax1.set_title('Training & Validation Loss', fontsize=13, fontweight='bold')
    ax1.legend()
    ax1.grid(alpha=0.3)

    # 2. Loss improvement
    ax2 = fig.add_subplot(gs[0, 2])
    initial_loss = history['train_loss'][0]
    improvements = [(initial_loss - l) / initial_loss * 100 for l in history['train_loss']]
    ax2.plot(epochs, improvements, 'o-', color='green', linewidth=2, markersize=4)
    ax2.set_xlabel('Epoch', fontsize=11)
    ax2.set_ylabel('Improvement (%)', fontsize=11)
    ax2.set_title('Loss Improvement', fontsize=13, fontweight='bold')
    ax2.grid(alpha=0.3)

    # 3. Loss per epoch (bar chart)
    ax3 = fig.add_subplot(gs[1, 0])
    ax3.bar(epochs, history['train_loss'], alpha=0.7, color='steelblue')
    ax3.set_xlabel('Epoch', fontsize=11)
    ax3.set_ylabel('Train Loss', fontsize=11)
    ax3.set_title('Train Loss per Epoch', fontsize=13, fontweight='bold')
    ax3.grid(alpha=0.3, axis='y')

    # 4. Validation loss per epoch
    if history.get('val_loss'):
        ax4 = fig.add_subplot(gs[1, 1])
        ax4.bar(epochs, history['val_loss'], alpha=0.7, color='coral')
        best_val = min(history['val_loss'])
        ax4.axhline(best_val, color='green', linestyle='--', alpha=0.5, linewidth=2)
        ax4.set_xlabel('Epoch', fontsize=11)
        ax4.set_ylabel('Val Loss', fontsize=11)
        ax4.set_title('Validation Loss per Epoch', fontsize=13, fontweight='bold')
        ax4.grid(alpha=0.3, axis='y')

    # 5. Statistics
    ax5 = fig.add_subplot(gs[1, 2])
    ax5.axis('off')
    stats_text = f"""
    üìä TRAINING STATISTICS
    
    Total Epochs: {len(history['train_loss'])}
    Best Epoch: {history.get('best_epoch', 'N/A')}
    Min Train Loss: {min(history['train_loss']):.4f}
    Final Train Loss: {history['train_loss'][-1]:.4f}
    """
    if history.get('val_loss'):
        stats_text += f"\n    Min Val Loss: {min(history['val_loss']):.4f}\n    Final Val Loss: {history['val_loss'][-1]:.4f}"
    ax5.text(0.1, 0.5, stats_text, fontsize=11, verticalalignment='center', family='monospace')

    # 6. Convergence analysis
    ax6 = fig.add_subplot(gs[2, :])
    window = 5
    if len(history['train_loss']) >= window:
        smoothed = np.convolve(history['train_loss'], np.ones(window)/window, mode='valid')
        ax6.plot(range(1, len(smoothed) + 1), smoothed, 'b-', linewidth=3, label=f'Smoothed (window={window})')
    ax6.plot(epochs, history['train_loss'], 'o-', linewidth=1, markersize=3, alpha=0.5, label='Raw')
    if history.get('val_loss'):
        if len(history['val_loss']) >= window:
            smoothed_val = np.convolve(history['val_loss'], np.ones(window)/window, mode='valid')
            ax6.plot(range(1, len(smoothed_val) + 1), smoothed_val, 'r-', linewidth=3, label='Val Smoothed')
        ax6.plot(epochs, history['val_loss'], 's-', linewidth=1, markersize=3, alpha=0.5, label='Val Raw')
    ax6.set_xlabel('Epoch', fontsize=11)
    ax6.set_ylabel('Loss', fontsize=11)
    ax6.set_title('Convergence Analysis (Smoothed Loss Curves)', fontsize=13, fontweight='bold')
    ax6.legend()
    ax6.grid(alpha=0.3)

    plt.savefig('artifacts/training_analysis.png', dpi=150, bbox_inches='tight')
    print("‚úÖ Training analysis visualization saved to artifacts/training_analysis.png")
    plt.show()
else:
    print(f"‚ö†Ô∏è  Training history not found: {history_file}")
    print("Run training cell first (Cell 20)")

‚ö†Ô∏è  Metrics file not found: /content/drive/MyDrive/cardiometabolic-risk-colab/checkpoints/training_metrics.json


## ‚úÖ Phase 5 Complete

Checkpoints are saved to Google Drive at:
```
/MyDrive/cardiometabolic-risk-colab/phase5_checkpoints/
```

**Next Steps**:
1. Phase 6: Linear probe evaluation
2. Phase 7: Extract embeddings
3. Phase 8: Train XGBoost models

See [README.md](../README.md) for detailed instructions.