# Colab / Remote Setup
If you are running this in Google Colab, run the cell below to mount your drive, install dependencies, and setup the environment.
If running locally, you can skip the installation steps or adjust as needed.

In [None]:
import os
import sys

# Detect Colab
try:
    import google.colab
    IN_COLAB = True
    print("Running in Google Colab.")
except ImportError:
    IN_COLAB = False
    print("Running locally.")

if IN_COLAB:
    try:
        from google.colab import drive
        # Mount Drive
        drive.mount('/content/drive')
    except Exception as e:
        print(f"Warning: Could not mount Google Drive: {e}")
        IN_COLAB = False

    if IN_COLAB:
        # EDIT THIS PATH: Point to where 'Sequence' repo is located in your Drive
        REPO_PATH = "/content/drive/MyDrive/Sequence"

        if not os.path.exists(REPO_PATH):
            # Optional: Clone if needed
            # !git clone https://github.com/crichalchemist/Sequence.git $REPO_PATH
            print(f"Warning: {REPO_PATH} does not exist. Please adjust the path.")
        else:
            os.chdir(REPO_PATH)
            print(f"Changed working directory to {os.getcwd()}")
        
        # Install dependencies
        # We assume requirements.txt is in the repo root
        !pip install -r requirements.txt
        !pip install deepspeed


Running in Google Colab.


ValueError: mount failed

# Phase 0: Single-GPU Per-Pair Training

This notebook runs *phase zero* single-GPU training for one or more pairs using the existing `run/training_pipeline.py` components. It:

- Prepares data for each pair
- Builds the hybrid agent model
- Optionally wraps training in DeepSpeed **stage 0** (no sharding) on a single GPU
- Trains and evaluates per pair, saving checkpoints

> Tip: Use a fresh Python environment with the repo requirements installed. GPU is optional but recommended.

In [None]:
import os
import sys
from pathlib import Path

import torch

# Robust logic to find ROOT (works in notebooks/Colab/VS Code)
try:
    # If running as a script
    ROOT = Path(__file__).resolve().parents[1]
except NameError:
    # If running in a notebook, assume we are in 'notebooks/' or root
    # We look for a marker file like 'pyproject.toml' or 'pairs.csv'
    cwd = Path.cwd()
    if (cwd / "pairs.csv").exists():
        ROOT = cwd
    elif (cwd.parent / "pairs.csv").exists():
        ROOT = cwd.parent
    else:
        # Fallback: assume standard structure /Sequence/notebooks -> /Sequence
        ROOT = cwd.parents[0] if cwd.name == "notebooks" else cwd

if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))
if str(ROOT / "run") not in sys.path:
    sys.path.insert(0, str(ROOT / "run"))

print(f"Project ROOT: {ROOT}")

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Torch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device count: {torch.cuda.device_count()}")
    print(f"Using device: {torch.cuda.get_device_name(0)}")
else:
    print("Falling back to CPU. Training will be slower.")


NameError: name '__file__' is not defined

In [None]:
from copy import deepcopy
from types import SimpleNamespace

from data.prepare_dataset import process_pair

# Default data prep knobs (edit as needed)
data_prep_defaults = dict(
    input_root=ROOT / "output_central",  # location of Central-time zips/CSVs
    years=None,  # e.g., "2022,2023" to restrict
    t_in=120,
    t_out=10,
    target_type="classification",
    flat_threshold=0.0001,
    train_ratio=0.7,
    val_ratio=0.15,
    feature_groups="all",
    exclude_feature_groups=None,
    sma_windows="10,20,50",
    ema_windows="10,20,50",
    rsi_window=14,
    bollinger_window=20,
    bollinger_num_std=2.0,
    atr_window=14,
    short_vol_window=10,
    long_vol_window=50,
    spread_windows="20",
    imbalance_smoothing=5,
    intrinsic_time=False,
    dc_threshold_up=0.001,
    dc_threshold_down=None,
    lookahead_window=None,
    top_k=3,
    predict_sell_now=False,
    include_sentiment=False,
)


def build_loaders_for_pair(pair: str, overrides: dict | None = None):
    """Prepare train/val/test loaders for a given pair."""
    cfg = deepcopy(data_prep_defaults)
    if overrides:
        cfg.update(overrides)

    prep_args = SimpleNamespace(pairs=pair, **cfg)
    pair_name, loaders = process_pair(pair, prep_args)
    num_features = next(iter(loaders["train"]))[0].shape[-1]
    return pair_name, loaders, num_features


# Smoke-test a small sample (set to True to preview a batch)
if False:
    sample_pair, sample_loaders, sample_feats = build_loaders_for_pair("gbpusd")
    xb, yb = next(iter(sample_loaders["train"]))
    print(sample_pair, xb.shape, {k: v.shape for k, v in yb.items()})

In [None]:
# Optional: DeepSpeed Stage 0 (no sharding) for single-GPU runs.
# If deepspeed is unavailable, the notebook will fall back to standard PyTorch.

deepspeed_available = False
try:
    import deepspeed  # type: ignore

    deepspeed_available = True
except ImportError:
    deepspeed = None  # type: ignore


def make_deepspeed_engine(model, optimizer, lr_scheduler=None, train_batch_size: int | None = None, grad_clip: float | None = None):
    if not deepspeed_available:
        return None, optimizer, lr_scheduler

    ds_config = {
        "train_batch_size": train_batch_size,
        "zero_optimization": {"stage": 0},  # Phase zero
        "gradient_accumulation_steps": 1,
        "fp16": {"enabled": False},
        "bf16": {"enabled": False},
    }
    
    # Add gradient clipping to DeepSpeed config if specified
    if grad_clip is not None:
        ds_config["gradient_clipping"] = grad_clip
    
    engine, optimizer, _, lr_scheduler = deepspeed.initialize(
        model=model,
        model_parameters=model.parameters(),
        optimizer=optimizer,
        lr_scheduler=lr_scheduler,
        config=ds_config,
    )
    return engine, optimizer, lr_scheduler


print(f"DeepSpeed available: {deepspeed_available}")


In [None]:
# Define task_type and make_model factory before train_pair
# Derive task_type from data_prep configuration
task_type = data_prep_defaults.get("target_type", "classification")

def make_model(num_features: int):
    """Factory function to construct model for training."""
    from models.agent_hybrid import HybridAgentModel
    
    # Instantiate model with required features
    model = HybridAgentModel(
        num_features=num_features,
        num_classes=3,  # Prediction classes
        lookahead_window=10,
        device=DEVICE
    )
    return model

In [None]:
from eval.agent_eval import evaluate_model
from risk.risk_manager import RiskManager
from train.core.agent_train import train_model, _compute_losses, _select_outputs, _to_device

ckpt_root = ROOT / "checkpoints" / "phase0"
ckpt_root.mkdir(parents=True, exist_ok=True)


def train_pair(pair: str, use_deepspeed: bool = False):
    pair_name, loaders, num_features = build_loaders_for_pair(pair)
    model = make_model(num_features)
    ckpt_path = ckpt_root / f"{pair_name}_best.pt"
    train_cfg = make_train_config(ckpt_path)
    risk_manager = RiskManager(train_cfg.risk) if getattr(train_cfg, "risk", None) and train_cfg.risk.enabled else None

    optimizer = torch.optim.AdamW(model.parameters(), lr=train_cfg.learning_rate, weight_decay=train_cfg.weight_decay)
    ds_engine = None
    if use_deepspeed and deepspeed_available:
        ds_engine, optimizer, _ = make_deepspeed_engine(
            model=model,
            optimizer=optimizer,
            lr_scheduler=None,
            train_batch_size=train_cfg.batch_size,
            grad_clip=train_cfg.grad_clip,  # Pass grad_clip to DeepSpeed config
        )

    # Fallback: use the existing high-level trainer if DeepSpeed is off
    if ds_engine is None:
        history = train_model(
            model,
            loaders["train"],
            loaders["val"],
            train_cfg,
            task_type=task_type,
            risk_manager=risk_manager,
        )
        eval_metrics = evaluate_model(model, loaders["test"], task_type=task_type, risk_manager=risk_manager)
        print(f"Eval ({pair_name}): {eval_metrics}")
        return history, eval_metrics

    # Custom loop with DeepSpeed stage 0
    device = torch.device(DEVICE)
    ds_engine.train()
    history = {"train_loss": [], "val_loss": [], "val_metric": []}

    for epoch in range(1, train_cfg.epochs + 1):
        running_loss = 0.0
        for step, batch in enumerate(loaders["train"], start=1):
            x, y = _to_device(batch, device)
            outputs, _ = ds_engine(x)
            logits = _select_outputs(outputs, task_type)
            outputs = dict(outputs)
            outputs["primary"] = logits
            loss, _ = _compute_losses(outputs, y, train_cfg, task_type)
            ds_engine.backward(loss)
            # Note: gradient clipping is handled by DeepSpeed config
            ds_engine.step()
            ds_engine.zero_grad()
            running_loss += loss.item()
            if step % log_every == 0:
                print(f"epoch {epoch} step {step} loss {running_loss / step:.4f}")

        train_epoch_loss = running_loss / max(1, len(loaders["train"]))
        # evaluate_model returns a dict, not a tuple
        val_metrics = evaluate_model(ds_engine.module, loaders["val"], task_type=task_type, risk_manager=risk_manager)
        val_loss = val_metrics.get("loss", 0.0)
        val_metric = val_metrics.get("accuracy", val_metrics.get("mse", 0.0))
        history["train_loss"].append(train_epoch_loss)
        history["val_loss"].append(val_loss)
        history["val_metric"].append(val_metric)
        print(f"epoch {epoch}/{train_cfg.epochs} train_loss {train_epoch_loss:.4f} val_loss {val_loss:.4f} val_metric {val_metric:.4f}")

    # Final eval and checkpointing
    eval_metrics = evaluate_model(ds_engine.module, loaders["test"], task_type=task_type, risk_manager=risk_manager)
    print(f"Eval ({pair_name}): {eval_metrics}")
    ds_engine.save_checkpoint(str(ckpt_root), tag=f"{pair_name}")
    return history, eval_metrics


print("Trainer ready. Call train_pair('eurusd') to start.")


In [None]:
# Training hyperparameters - define before train_pair uses them
log_every = 50  # Logging frequency
batch_size = 64
epochs = 5
learning_rate = 1e-3
weight_decay = 0.0
grad_clip = 1.0

def make_train_config(ckpt_path: Path):
    cfg = TrainingConfig(
        batch_size=batch_size,
        epochs=epochs,
        learning_rate=learning_rate,
        weight_decay=weight_decay,
        device=DEVICE,
        checkpoint_path=str(ckpt_path),
    )
    cfg.grad_clip = grad_clip
    cfg.log_every = log_every
    return cfg

In [None]:
from train.core.training_config import TrainingConfig
# Training/eval hyperparameters
pairs_to_run = ["eurusd", "gbpusd"]  # edit list
batch_size = 64
epochs = 5
learning_rate = 1e-3
weight_decay = 0.0
num_workers = 4
pin_memory = torch.cuda.is_available()
grad_clip = 1.0
log_every = 50

def make_train_config(ckpt_path: Path):
    cfg = TrainingConfig(
        batch_size=batch_size,
        epochs=epochs,
        learning_rate=learning_rate,
        weight_decay=weight_decay,
        device=DEVICE,
        checkpoint_path=str(ckpt_path),
    )
    cfg.grad_clip = grad_clip
    cfg.log_every = log_every
    return cfg

print("Hyperparameters configured. Edit pairs_to_run and epochs as needed.")

In [None]:
from eval.agent_eval import evaluate_model
from risk.risk_manager import RiskManager
from train.core.agent_train import train_model, _compute_losses, _select_outputs, _to_device

ckpt_root = ROOT / "checkpoints" / "phase0"
ckpt_root.mkdir(parents=True, exist_ok=True)


def train_pair(pair: str, use_deepspeed: bool = False):
    pair_name, loaders, num_features = build_loaders_for_pair(pair)
    model = make_model(num_features)
    ckpt_path = ckpt_root / f"{pair_name}_best.pt"
    train_cfg = make_train_config(ckpt_path)
    risk_manager = RiskManager(train_cfg.risk) if getattr(train_cfg, "risk", None) and train_cfg.risk.enabled else None

    optimizer = torch.optim.AdamW(model.parameters(), lr=train_cfg.learning_rate, weight_decay=train_cfg.weight_decay)
    ds_engine = None
    if use_deepspeed and deepspeed_available:
        ds_engine, optimizer, _ = make_deepspeed_engine(
            model=model,
            optimizer=optimizer,
            lr_scheduler=None,
            train_batch_size=train_cfg.batch_size,
        )

    # Fallback: use the existing high-level trainer if DeepSpeed is off
    if ds_engine is None:
        history = train_model(
            model,
            loaders["train"],
            loaders["val"],
            train_cfg,
            task_type=task_type,
            risk_manager=risk_manager,
        )
        eval_metrics = evaluate_model(model, loaders["test"], task_type=task_type, risk_manager=risk_manager)
        print(f"Eval ({pair_name}): {eval_metrics}")
        return history, eval_metrics

    # Custom loop with DeepSpeed stage 0
    device = torch.device(DEVICE)
    ds_engine.train()
    history = {"train_loss": [], "val_loss": [], "val_metric": []}

    for epoch in range(1, train_cfg.epochs + 1):
        running_loss = 0.0
        for step, batch in enumerate(loaders["train"], start=1):
            x, y = _to_device(batch, device)
            outputs, _ = ds_engine(x)
            logits = _select_outputs(outputs, task_type)
            outputs = dict(outputs)
            outputs["primary"] = logits
            loss, _ = _compute_losses(outputs, y, train_cfg, task_type)
            ds_engine.backward(loss)
            if train_cfg.grad_clip:
                torch.nn.utils.clip_grad_norm_(ds_engine.parameters(), train_cfg.grad_clip)
            ds_engine.step()
            ds_engine.zero_grad()
            running_loss += loss.item()
            if step % log_every == 0:
                print(f"epoch {epoch} step {step} loss {running_loss / step:.4f}")

        train_epoch_loss = running_loss / max(1, len(loaders["train"]))
        val_loss, val_metric = evaluate_model(ds_engine.module, loaders["val"], task_type=task_type, risk_manager=risk_manager)
        history["train_loss"].append(train_epoch_loss)
        history["val_loss"].append(val_loss)
        history["val_metric"].append(val_metric)
        print(f"epoch {epoch}/{train_cfg.epochs} train_loss {train_epoch_loss:.4f} val_loss {val_loss:.4f} val_metric {val_metric:.4f}")

    # Final eval and checkpointing
    eval_metrics = evaluate_model(ds_engine.module, loaders["test"], task_type=task_type, risk_manager=risk_manager)
    print(f"Eval ({pair_name}): {eval_metrics}")
    ds_engine.save_checkpoint(str(ckpt_root), tag=f"{pair_name}")
    return history, eval_metrics


print("Trainer ready. Call train_pair('eurusd') to start.")

In [None]:
run_with_deepspeed = False  # set True if deepspeed is installed and you want stage-0 wrapping

results = {}
for pair in pairs_to_run:
    print(f"\n=== Training {pair} on {DEVICE} (DeepSpeed={run_with_deepspeed}) ===")
    history, metrics = train_pair(pair, use_deepspeed=run_with_deepspeed)
    results[pair] = {
        "history": history,
        "metrics": metrics,
    }

results