# Size Picker Agent V2

**Task**: Pick optimal position size based on candle patterns

**Input**: OHLC candles (normalized to log returns + features) + Equity

**Output**: Two heads predicting for each of 30 sizes:
- P(liquidation) - probability the trade will liquidate at this size
- E[return] - expected return at this size

**Decision Rule**: Pick largest size where P(liq) < threshold, with best expected return

**Goal**: Maximize return without getting liquidated (30% equity = liquidation)

## V2 Changes (from expert review)

1. **Two-head model** instead of 30-class classification
2. **Asymmetric loss** - penalize predicting "safe" when actually liquidates
3. **Better normalization** - log returns + candle features (range, body, wicks)
4. **Training stability** - AdamW, gradient clipping, OneCycleLR
5. **Risk-aware metrics** - liquidation rate, regret (not just accuracy)

## Configuration

All tunable parameters in the config cell:
- `EPOCHS`, `BATCH_SIZE`, `LEARNING_RATE`, `HIDDEN_SIZE`
- `GRAD_CLIP`, `WEIGHT_DECAY`, `LIQ_THRESHOLD`
- `DATASET_NAME` - which V2 dataset to train on
- `LOAD_FROM` - checkpoint filename to resume

## Training Data (V2)

Requires datasets with `all_results` field (30 outcomes per trade):
- `balanced_v2_100k` - 33k each of small/mid/big optimal
- Generate using `generate_all_datasets_v2()` in generate_datasets.ipynb

In [None]:
# Setup
import os
import pickle
import random

try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False

if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    DATA_DIR = '/content/drive/MyDrive/size_picker_data'
    print(f"Google Drive mounted. Data dir: {DATA_DIR}")
else:
    DATA_DIR = 'data'
    print(f"Running locally. Data dir: {DATA_DIR}")

os.makedirs('models', exist_ok=True)

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt

# GPU Support
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"PyTorch version: {torch.__version__}")
print(f"Device: {device}")
if device.type == 'cuda':
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# =============================================================================
# CONFIGURATION - V2 Parameters
# =============================================================================

# Training settings
EPOCHS = 10000
BATCH_SIZE = 32
LEARNING_RATE = 0.001
GRAD_CLIP = 1.0
WEIGHT_DECAY = 0.01

# Model settings
HIDDEN_SIZE = 128
NUM_LSTM_LAYERS = 2
DROPOUT = 0.2
INPUT_FEATURES = 5  # log_return, range, body, upper_wick, lower_wick

# Decision settings
LIQ_THRESHOLD = 0.1  # Max acceptable P(liquidation) for size selection

# Loss weights
LOSS_LIQ_WEIGHT = 1.0
LOSS_RETURN_WEIGHT = 0.1
LOSS_ASYMMETRIC_WEIGHT = 5.0

# Data settings
DATASET_NAME = 'balanced_v2_100k'  # Requires V2 dataset with all_results
TRAIN_SPLIT = 0.8

# Checkpoint settings
LOAD_FROM = None                # Set to checkpoint filename to resume, or None for fresh
SAVE_EVERY = 100                # Save checkpoint every N epochs

print("V2 Configuration loaded:")
print(f"  EPOCHS: {EPOCHS}")
print(f"  BATCH_SIZE: {BATCH_SIZE}")
print(f"  LEARNING_RATE: {LEARNING_RATE}")
print(f"  GRAD_CLIP: {GRAD_CLIP}")
print(f"  WEIGHT_DECAY: {WEIGHT_DECAY}")
print(f"  HIDDEN_SIZE: {HIDDEN_SIZE}")
print(f"  NUM_LSTM_LAYERS: {NUM_LSTM_LAYERS}")
print(f"  DROPOUT: {DROPOUT}")
print(f"  LIQ_THRESHOLD: {LIQ_THRESHOLD}")
print(f"  DATASET_NAME: {DATASET_NAME}")
print(f"  LOAD_FROM: {LOAD_FROM or 'Training from scratch'}")

In [None]:
# Load datasets from Google Drive
def load_dataset(name):
    filepath = f'{DATA_DIR}/{name}.pkl'
    with open(filepath, 'rb') as f:
        trades = pickle.load(f)
    print(f"Loaded {len(trades)} trades from {name}")
    return trades

def list_datasets():
    files = [f for f in os.listdir(DATA_DIR) if f.endswith('.pkl')]
    print("Available datasets:")
    for f in files:
        filepath = f'{DATA_DIR}/{f}'
        with open(filepath, 'rb') as file:
            trades = pickle.load(file)
        print(f"  {f}: {len(trades)} trades")
    return files

# List available datasets
list_datasets()

In [None]:
# Valid sizes and constants
SIZES = [round(0.15 * i, 2) for i in range(1, 31)]  # 0.15 to 4.50
NUM_SIZES = len(SIZES)
LOOKBACK = 24
EQUITY = 1000

print(f"Valid sizes: {SIZES[0]} to {SIZES[-1]} ({NUM_SIZES} sizes)")
print(f"Lookback: {LOOKBACK} candles")

# Categories
SMALL_SIZES = [s for s in SIZES if s <= 1.5]      # 0.15 to 1.50
MID_SIZES = [s for s in SIZES if 1.5 < s <= 3.0]  # 1.65 to 3.00
BIG_SIZES = [s for s in SIZES if s > 3.0]         # 3.15 to 4.50

print(f"Small: {len(SMALL_SIZES)} sizes ({SMALL_SIZES[0]}-{SMALL_SIZES[-1]})")
print(f"Mid: {len(MID_SIZES)} sizes ({MID_SIZES[0]}-{MID_SIZES[-1]})")
print(f"Big: {len(BIG_SIZES)} sizes ({BIG_SIZES[0]}-{BIG_SIZES[-1]})")

In [None]:
# Load training data
all_trades = load_dataset(DATASET_NAME)

# Verify distribution
categories = [t['category'] for t in all_trades]
print(f"\nCategory distribution:")
for cat in ['small', 'mid', 'big']:
    count = categories.count(cat)
    print(f"  {cat}: {count} ({count/len(all_trades)*100:.1f}%)")

# Show sample trade structure
print(f"\nSample trade keys: {list(all_trades[0].keys())}")
print(f"Candle shape: {all_trades[0]['candles'].shape}")

In [None]:
# Helper functions - V2 normalization

def normalize_candles_v2(candles):
    """
    V2 normalization: Extract meaningful features from OHLC candles.
    
    Instead of raw price percent changes, extract:
    - Log returns (close-to-close) - captures momentum
    - Range (high-low)/close - captures volatility
    - Body |close-open|/close - captures conviction
    - Upper wick - captures rejection from highs
    - Lower wick - captures rejection from lows
    
    Returns: [seq_len-1, 5] array (one fewer row due to returns)
    """
    # Ensure numpy array
    candles = np.array(candles)
    
    # Log returns (close-to-close)
    closes = candles[:, 3]
    log_returns = np.log(closes[1:] / closes[:-1])
    
    # Per-candle features (skip first since we use returns starting from second)
    opens = candles[1:, 0]
    highs = candles[1:, 1]
    lows = candles[1:, 2]
    closes_shifted = candles[1:, 3]
    
    # Range: (high - low) / close - volatility measure
    ranges = (highs - lows) / closes_shifted
    
    # Body: |close - open| / close - conviction measure
    bodies = np.abs(closes_shifted - opens) / closes_shifted
    
    # Upper wick: (high - max(open, close)) / close
    upper_wicks = (highs - np.maximum(opens, closes_shifted)) / closes_shifted
    
    # Lower wick: (min(open, close) - low) / close
    lower_wicks = (np.minimum(opens, closes_shifted) - lows) / closes_shifted
    
    # Stack features: [log_return, range, body, upper_wick, lower_wick]
    features = np.column_stack([log_returns, ranges, bodies, upper_wicks, lower_wicks])
    
    return features

# Legacy function for backwards compatibility
def normalize_candles(candles):
    """Legacy normalization - relative to first close price"""
    base = candles[0, 3]
    return (candles - base) / base * 100

# Test V2 normalization
sample = all_trades[0]
candles_v2 = normalize_candles_v2(sample['candles'])
print(f"V2 normalized shape: {candles_v2.shape}")
print(f"Feature names: [log_return, range, body, upper_wick, lower_wick]")
print(f"Sample features (first 3 bars):")
for i in range(min(3, candles_v2.shape[0])):
    print(f"  Bar {i}: ret={candles_v2[i,0]:.4f}, rng={candles_v2[i,1]:.4f}, body={candles_v2[i,2]:.4f}, uwk={candles_v2[i,3]:.4f}, lwk={candles_v2[i,4]:.4f}")

In [None]:
# Neural network - V2 Two-Head Architecture

class SizePickerV2(nn.Module):
    """
    Two-head model for position sizing.
    
    Instead of 30-class classification, predicts:
    - P(liquidation) for each of 30 sizes
    - E[return] for each of 30 sizes
    
    Decision: Pick largest size where P(liq) < threshold with best expected return.
    """
    
    def __init__(self, input_size=5, hidden_size=128, num_layers=2, dropout=0.2, num_sizes=30):
        super().__init__()
        self.num_sizes = num_sizes
        self.hidden_size = hidden_size
        
        # Encoder: LSTM with dropout between layers
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0
        )
        
        # Layer normalization for stability
        self.layer_norm = nn.LayerNorm(hidden_size)
        
        # Shared representation (LSTM output + equity)
        self.shared = nn.Sequential(
            nn.Linear(hidden_size + 1, 64),  # +1 for equity
            nn.ReLU(),
            nn.Dropout(dropout)
        )
        
        # Head 1: P(liquidation) for each size - 30 sigmoid outputs
        self.liq_head = nn.Sequential(
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, num_sizes),
            nn.Sigmoid()  # Output probabilities [0, 1]
        )
        
        # Head 2: E[return] for each size - 30 outputs (unbounded)
        self.return_head = nn.Sequential(
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, num_sizes)
        )
    
    def forward(self, candles, equity):
        """
        Forward pass.
        
        Args:
            candles: [batch, seq_len, 5] normalized candle features
            equity: [batch, 1] normalized equity
            
        Returns:
            p_liq: [batch, 30] P(liquidation) for each size
            e_return: [batch, 30] E[return] for each size
        """
        # LSTM encoding
        lstm_out, _ = self.lstm(candles)
        last_hidden = lstm_out[:, -1, :]  # Take last timestep
        
        # Normalize for stability
        last_hidden = self.layer_norm(last_hidden)
        
        # Combine with equity
        combined = torch.cat([last_hidden, equity], dim=1)
        
        # Shared representation
        shared = self.shared(combined)
        
        # Two heads
        p_liq = self.liq_head(shared)       # [batch, 30]
        e_return = self.return_head(shared)  # [batch, 30]
        
        return p_liq, e_return
    
    def pick_size(self, candles, equity, liq_threshold=0.1):
        """
        Choose optimal size: largest safe size with best expected return.
        
        Args:
            candles: [batch, seq_len, 5]
            equity: [batch, 1]
            liq_threshold: max acceptable P(liquidation)
            
        Returns:
            size_idx: [batch] index of chosen size
        """
        with torch.no_grad():
            p_liq, e_return = self.forward(candles, equity)
            
            # Mask sizes with P(liq) > threshold
            safe_mask = p_liq < liq_threshold
            
            # Set unsafe sizes to -inf return so they won't be picked
            masked_returns = torch.where(
                safe_mask,
                e_return,
                torch.tensor(float('-inf'), device=e_return.device)
            )
            
            # Pick size with best return among safe options
            idx = masked_returns.argmax(dim=1)
            
        return idx


# Create model and move to GPU
model = SizePickerV2(
    input_size=INPUT_FEATURES,
    hidden_size=HIDDEN_SIZE,
    num_layers=NUM_LSTM_LAYERS,
    dropout=DROPOUT,
    num_sizes=NUM_SIZES
).to(device)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Model: SizePickerV2")
print(f"  LSTM({INPUT_FEATURES} -> {HIDDEN_SIZE}, {NUM_LSTM_LAYERS} layers)")
print(f"  Outputs: P(liq)[{NUM_SIZES}] + E[return][{NUM_SIZES}]")
print(f"  Total parameters: {total_params:,}")
print(f"  Trainable parameters: {trainable_params:,}")
print(f"  Device: {device}")

In [None]:
# Training setup - V2 Dataset with all_results

# Find max sequence length in dataset (after V2 normalization, length is n-1)
max_len = max(t['candles'].shape[0] - 1 for t in all_trades)  # -1 because V2 uses returns
print(f"Max candle sequence length (after V2 norm): {max_len}")

class TradeDatasetV2(Dataset):
    """
    V2 Dataset that returns all_results for multi-task learning.
    
    Returns:
        candles: [seq_len, 5] V2 normalized features
        equity: [1] normalized equity
        target: int - optimal size index
        liq_targets: [30] binary - did each size liquidate?
        return_targets: [30] float - return % for each size
    """
    
    def __init__(self, trades, max_len):
        self.trades = trades
        self.max_len = max_len
        
        # Verify trades have all_results
        if 'all_results' not in trades[0]:
            raise ValueError("Dataset missing 'all_results' field. Use V2 datasets from generate_all_datasets_v2()")
    
    def __len__(self):
        return len(self.trades)
    
    def __getitem__(self, idx):
        trade = self.trades[idx]
        
        # V2 normalization
        candles_norm = normalize_candles_v2(trade['candles'])
        
        # Pad to max length
        seq_len = candles_norm.shape[0]
        if seq_len < self.max_len:
            padding = np.zeros((self.max_len - seq_len, INPUT_FEATURES))
            candles_norm = np.vstack([candles_norm, padding])
        
        candles_t = torch.FloatTensor(candles_norm)
        equity_t = torch.FloatTensor([EQUITY / 1000])  # Normalized equity
        target = SIZES.index(trade['optimal_size'])
        
        # Build targets from all_results
        all_results = trade['all_results']
        liq_targets = torch.zeros(NUM_SIZES)
        return_targets = torch.zeros(NUM_SIZES)
        
        for i, result in enumerate(all_results):
            liq_targets[i] = float(result['liquidated'])
            return_targets[i] = result['return_pct']
        
        return candles_t, equity_t, target, liq_targets, return_targets


# Check if dataset has all_results
has_all_results = 'all_results' in all_trades[0]
print(f"Dataset has all_results: {has_all_results}")

if not has_all_results:
    print("\nWARNING: Current dataset missing 'all_results' field!")
    print("You need to regenerate using generate_all_datasets_v2()")
    print("The training loop will fail without V2 datasets.")
else:
    # Verify all_results structure
    sample_results = all_trades[0]['all_results']
    print(f"  all_results length: {len(sample_results)}")
    print(f"  Sample result: {sample_results[0]}")

# Split data
random.shuffle(all_trades)
split_idx = int(len(all_trades) * TRAIN_SPLIT)
train_trades = all_trades[:split_idx]
val_trades = all_trades[split_idx:]

if has_all_results:
    train_dataset = TradeDatasetV2(train_trades, max_len)
    val_dataset = TradeDatasetV2(val_trades, max_len)
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    print(f"\nTrain: {len(train_trades)} trades, {len(train_loader)} batches")
    print(f"Val: {len(val_trades)} trades, {len(val_loader)} batches")
else:
    print("\nDataLoaders NOT created - need V2 datasets")

In [None]:
# Training loop - V2 with asymmetric loss

# Loss function
def compute_loss(p_liq, e_return, liq_targets, return_targets):
    """
    Combined loss with asymmetric penalties.
    
    Args:
        p_liq: [batch, 30] predicted P(liquidation)
        e_return: [batch, 30] predicted E[return]
        liq_targets: [batch, 30] actual liquidation (0 or 1)
        return_targets: [batch, 30] actual returns
        
    Returns:
        total_loss, loss_parts dict
    """
    # Liquidation prediction loss (BCE)
    liq_loss = F.binary_cross_entropy(p_liq, liq_targets)
    
    # Return prediction loss (Huber for robustness to outliers)
    return_loss = F.huber_loss(e_return, return_targets)
    
    # Asymmetric penalty: heavily penalize predicting "safe" when actually liquidates
    # This is the critical safety component
    false_safe = (p_liq < 0.5) & (liq_targets == 1)
    asymmetric_penalty = (false_safe.float() * LOSS_ASYMMETRIC_WEIGHT).mean()
    
    # Weighted combination
    total_loss = (LOSS_LIQ_WEIGHT * liq_loss + 
                  LOSS_RETURN_WEIGHT * return_loss + 
                  asymmetric_penalty)
    
    return total_loss, {
        'liq': liq_loss.item(),
        'ret': return_loss.item(),
        'asym': asymmetric_penalty.item(),
        'total': total_loss.item()
    }

# Metrics
def compute_metrics(model, loader, liq_threshold=LIQ_THRESHOLD):
    """
    Compute risk-aware metrics.
    
    Returns:
        dict with liquidation_rate, avg_return, optimal_return, regret
    """
    model.eval()
    
    total_trades = 0
    liquidations = 0
    total_return = 0
    optimal_return = 0
    correct_category = 0
    
    with torch.no_grad():
        for candles, equity, targets, liq_targets, return_targets in loader:
            # Move data to GPU
            candles = candles.to(device)
            equity = equity.to(device)
            liq_targets = liq_targets.to(device)
            return_targets = return_targets.to(device)
            
            p_liq, e_return = model(candles, equity)
            chosen_idx = model.pick_size(candles, equity, liq_threshold)
            
            batch_size = candles.shape[0]
            for i in range(batch_size):
                chosen = chosen_idx[i].item()
                optimal = targets[i].item()
                
                # Did chosen size liquidate?
                if liq_targets[i, chosen].item() > 0.5:
                    liquidations += 1
                    actual_ret = -70  # 70% loss on liquidation
                else:
                    actual_ret = return_targets[i, chosen].item()
                
                # Optimal return (what we could have gotten)
                opt_ret = return_targets[i, optimal].item()
                
                total_return += actual_ret
                optimal_return += opt_ret
                total_trades += 1
                
                # Category accuracy (for comparison with V1)
                chosen_size = SIZES[chosen]
                optimal_size = SIZES[optimal]
                if get_category(chosen_size) == get_category(optimal_size):
                    correct_category += 1
    
    return {
        'liquidation_rate': liquidations / total_trades * 100,
        'avg_return': total_return / total_trades,
        'optimal_return': optimal_return / total_trades,
        'regret': (optimal_return - total_return) / total_trades,
        'category_acc': correct_category / total_trades * 100
    }

def get_category(size):
    if size <= 1.5: return 'small'
    elif size <= 3.0: return 'mid'
    else: return 'big'

# Optimizer with weight decay
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)

# OneCycleLR scheduler
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer,
    max_lr=LEARNING_RATE * 10,  # Peak at 10x base LR
    epochs=EPOCHS,
    steps_per_epoch=len(train_loader),
    pct_start=0.1,  # Warmup for 10% of training
    anneal_strategy='cos'
)

# Checkpoint directory
CHECKPOINT_DIR = f'{DATA_DIR}/checkpoints_v2'
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

# Load checkpoint if specified
start_epoch = 0
best_regret = float('inf')
history = {'train_loss': [], 'val_liq_rate': [], 'val_return': [], 'val_regret': [], 'lr': []}

if LOAD_FROM:
    checkpoint_path = f'{CHECKPOINT_DIR}/{LOAD_FROM}' if not LOAD_FROM.startswith('/') else LOAD_FROM
    if os.path.exists(checkpoint_path):
        print(f"Loading checkpoint: {checkpoint_path}")
        checkpoint = torch.load(checkpoint_path, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        if 'optimizer_state_dict' in checkpoint:
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        if 'scheduler_state_dict' in checkpoint:
            scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        if 'epoch' in checkpoint:
            start_epoch = checkpoint['epoch']
        if 'history' in checkpoint:
            history = checkpoint['history']
        if 'best_regret' in checkpoint:
            best_regret = checkpoint['best_regret']
        print(f"  Resumed from epoch {start_epoch}")
    else:
        print(f"Checkpoint not found: {checkpoint_path}, starting fresh")

def train_epoch(model, loader, optimizer, scheduler):
    model.train()
    total_loss = 0
    loss_parts_sum = {'liq': 0, 'ret': 0, 'asym': 0}
    
    for candles, equity, targets, liq_targets, return_targets in loader:
        # Move data to GPU
        candles = candles.to(device)
        equity = equity.to(device)
        liq_targets = liq_targets.to(device)
        return_targets = return_targets.to(device)
        
        optimizer.zero_grad()
        
        p_liq, e_return = model(candles, equity)
        loss, loss_parts = compute_loss(p_liq, e_return, liq_targets, return_targets)
        
        loss.backward()
        
        # Gradient clipping for stability
        torch.nn.utils.clip_grad_norm_(model.parameters(), GRAD_CLIP)
        
        optimizer.step()
        scheduler.step()
        
        total_loss += loss.item()
        for k in loss_parts_sum:
            loss_parts_sum[k] += loss_parts[k]
    
    n = len(loader)
    return total_loss / n, {k: v / n for k, v in loss_parts_sum.items()}

def save_checkpoint(model, optimizer, scheduler, epoch, history, metrics, best_regret, filename):
    checkpoint = {
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
        'epoch': epoch,
        'history': history,
        'metrics': metrics,
        'best_regret': best_regret,
        'config': {
            'hidden_size': HIDDEN_SIZE,
            'num_layers': NUM_LSTM_LAYERS,
            'dropout': DROPOUT,
            'learning_rate': LEARNING_RATE,
            'batch_size': BATCH_SIZE,
            'liq_threshold': LIQ_THRESHOLD,
            'dataset': DATASET_NAME
        }
    }
    torch.save(checkpoint, f"{CHECKPOINT_DIR}/{filename}")

# Train
print(f"\nV2 Training from epoch {start_epoch + 1} to {EPOCHS}")
print(f"Checkpoints: {CHECKPOINT_DIR}")
print(f"Best regret so far: {best_regret:.2f}%")
print(f"Device: {device}\n")
print(f"{'Epoch':>6} | {'Loss':>8} | {'Liq%':>6} | {'Return':>8} | {'Regret':>8} | {'CatAcc':>6} | {'LR':>10}")
print("-" * 75)

for epoch in range(start_epoch, EPOCHS):
    train_loss, loss_parts = train_epoch(model, train_loader, optimizer, scheduler)
    metrics = compute_metrics(model, val_loader, LIQ_THRESHOLD)
    
    current_lr = scheduler.get_last_lr()[0]
    
    history['train_loss'].append(train_loss)
    history['val_liq_rate'].append(metrics['liquidation_rate'])
    history['val_return'].append(metrics['avg_return'])
    history['val_regret'].append(metrics['regret'])
    history['lr'].append(current_lr)
    
    # Check for new best (minimize regret)
    is_best = metrics['regret'] < best_regret
    if is_best:
        best_regret = metrics['regret']
        save_checkpoint(model, optimizer, scheduler, epoch + 1, history, metrics, best_regret, 'best.pt')
    
    # Save periodic checkpoint
    if (epoch + 1) % SAVE_EVERY == 0:
        save_checkpoint(model, optimizer, scheduler, epoch + 1, history, metrics, best_regret, f'epoch_{epoch+1}.pt')
        marker = "SAVED"
    elif is_best:
        marker = "*** BEST ***"
    else:
        marker = ""
    
    print(f"{epoch+1:6d} | {train_loss:8.4f} | {metrics['liquidation_rate']:5.1f}% | "
          f"{metrics['avg_return']:7.2f}% | {metrics['regret']:7.2f}% | "
          f"{metrics['category_acc']:5.1f}% | {current_lr:.2e} {marker}")

print(f"\nTraining complete!")
print(f"Best regret: {best_regret:.2f}%")

In [None]:
# Plot V2 training curves
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Loss
ax = axes[0, 0]
ax.plot(history['train_loss'], label='Train Loss')
ax.set_xlabel('Epoch')
ax.set_ylabel('Loss')
ax.set_title('Training Loss')
ax.legend()

# Liquidation Rate
ax = axes[0, 1]
ax.plot(history['val_liq_rate'], label='Liq Rate %', color='red')
ax.set_xlabel('Epoch')
ax.set_ylabel('Liquidation Rate %')
ax.set_title('Validation Liquidation Rate (lower is better)')
ax.axhline(y=10, color='gray', linestyle='--', alpha=0.5, label='10% target')
ax.legend()

# Return and Regret
ax = axes[1, 0]
ax.plot(history['val_return'], label='Avg Return %', color='green')
ax.plot(history['val_regret'], label='Regret %', color='orange')
ax.set_xlabel('Epoch')
ax.set_ylabel('%')
ax.set_title('Return vs Regret (higher return, lower regret is better)')
ax.legend()

# Learning Rate
ax = axes[1, 1]
ax.plot(history['lr'], label='Learning Rate', color='purple')
ax.set_xlabel('Epoch')
ax.set_ylabel('LR')
ax.set_title('Learning Rate Schedule (OneCycleLR)')
ax.set_yscale('log')
ax.legend()

plt.tight_layout()
plt.show()

# Final metrics
metrics = compute_metrics(model, val_loader, LIQ_THRESHOLD)
print(f"\nFinal Validation Metrics:")
print(f"  Liquidation Rate: {metrics['liquidation_rate']:.1f}%")
print(f"  Average Return: {metrics['avg_return']:.2f}%")
print(f"  Optimal Return: {metrics['optimal_return']:.2f}%")
print(f"  Regret: {metrics['regret']:.2f}%")
print(f"  Category Accuracy: {metrics['category_acc']:.1f}%")

In [None]:
# Hidden State Analysis - V2
# What patterns is the LSTM learning?

from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score
from sklearn.metrics.pairwise import cosine_similarity

def extract_hidden_states_v2(model, trades, max_len, max_samples=2000):
    """Extract LSTM hidden states from V2 model"""
    model.eval()
    hidden_states = []
    categories = []
    
    sample_trades = trades[:max_samples] if len(trades) > max_samples else trades
    
    with torch.no_grad():
        for trade in sample_trades:
            # V2 normalization
            candles_norm = normalize_candles_v2(trade['candles'])
            if candles_norm.shape[0] < max_len:
                padding = np.zeros((max_len - candles_norm.shape[0], INPUT_FEATURES))
                candles_norm = np.vstack([candles_norm, padding])
            
            candles_t = torch.FloatTensor(candles_norm).unsqueeze(0).to(device)
            
            # Extract hidden state (before heads)
            lstm_out, _ = model.lstm(candles_t)
            last_hidden = model.layer_norm(lstm_out[:, -1, :])
            
            hidden_states.append(last_hidden[0].cpu().numpy())
            categories.append(trade['category'])
    
    return np.array(hidden_states), categories

# Extract hidden states from validation set
print("Extracting V2 hidden states...")
hidden_states, categories = extract_hidden_states_v2(model, val_trades, max_len)
print(f"Extracted {len(hidden_states)} hidden states, shape: {hidden_states.shape}")

# Map categories to numbers for visualization
cat_to_num = {'small': 0, 'mid': 1, 'big': 2}
cat_labels = [cat_to_num[c] for c in categories]

In [None]:
# PCA Visualization - do categories cluster separately?
pca = PCA(n_components=2)
hidden_2d = pca.fit_transform(hidden_states)

fig, ax = plt.subplots(figsize=(10, 8))

colors = {'small': 'blue', 'mid': 'green', 'big': 'red'}
for cat in ['small', 'mid', 'big']:
    mask = [c == cat for c in categories]
    points = hidden_2d[mask]
    ax.scatter(points[:, 0], points[:, 1], c=colors[cat], label=cat, alpha=0.5, s=20)

ax.set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]*100:.1f}% variance)')
ax.set_ylabel(f'PC2 ({pca.explained_variance_ratio_[1]*100:.1f}% variance)')
ax.set_title('Hidden State Clustering\n(Do small/mid/big separate?)')
ax.legend()
plt.tight_layout()
plt.show()

# Silhouette score - quantifies cluster quality
# -1 = wrong clusters, 0 = overlapping, 1 = perfect separation
sil_score = silhouette_score(hidden_states, cat_labels)
print(f"\nSilhouette Score: {sil_score:.3f}")
print("  -1 to 0: Categories overlap (model not distinguishing)")
print("  0 to 0.5: Some separation")  
print("  0.5 to 1: Good separation (model learning distinct patterns)")

In [None]:
# Pattern Similarity - are same-category trades more similar internally?
sim_matrix = cosine_similarity(hidden_states)

# Calculate average similarity within and across categories
def avg_similarity(sim_matrix, categories, cat1, cat2):
    idx1 = [i for i, c in enumerate(categories) if c == cat1]
    idx2 = [i for i, c in enumerate(categories) if c == cat2]
    if not idx1 or not idx2:
        return 0
    sub_matrix = sim_matrix[np.ix_(idx1, idx2)]
    return sub_matrix.mean()

# Within-category similarity (should be HIGH)
small_small = avg_similarity(sim_matrix, categories, 'small', 'small')
mid_mid = avg_similarity(sim_matrix, categories, 'mid', 'mid')
big_big = avg_similarity(sim_matrix, categories, 'big', 'big')

# Cross-category similarity (should be LOWER)
small_mid = avg_similarity(sim_matrix, categories, 'small', 'mid')
small_big = avg_similarity(sim_matrix, categories, 'small', 'big')
mid_big = avg_similarity(sim_matrix, categories, 'mid', 'big')

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Bar chart
ax = axes[0]
labels = ['Small-Small', 'Mid-Mid', 'Big-Big', 'Small-Mid', 'Small-Big', 'Mid-Big']
values = [small_small, mid_mid, big_big, small_mid, small_big, mid_big]
colors = ['blue', 'green', 'red', 'purple', 'purple', 'purple']
bars = ax.bar(labels, values, color=colors)
ax.set_ylabel('Average Cosine Similarity')
ax.set_title('Pattern Similarity\n(Same-category should be higher than cross-category)')
ax.axhline(y=np.mean([small_mid, small_big, mid_big]), color='gray', linestyle='--', alpha=0.5, label='Avg cross')
plt.xticks(rotation=45, ha='right')
for bar, val in zip(bars, values):
    ax.text(bar.get_x() + bar.get_width()/2, val + 0.01, f'{val:.2f}', ha='center', fontsize=9)

# Heatmap
ax = axes[1]
sim_avg = np.array([
    [small_small, small_mid, small_big],
    [small_mid, mid_mid, mid_big],
    [small_big, mid_big, big_big]
])
im = ax.imshow(sim_avg, cmap='viridis', vmin=0, vmax=1)
ax.set_xticks([0, 1, 2])
ax.set_yticks([0, 1, 2])
ax.set_xticklabels(['Small', 'Mid', 'Big'])
ax.set_yticklabels(['Small', 'Mid', 'Big'])
ax.set_title('Category Similarity Matrix')
for i in range(3):
    for j in range(3):
        ax.text(j, i, f'{sim_avg[i,j]:.2f}', ha='center', va='center', color='white', fontsize=12)
plt.colorbar(im, ax=ax)

plt.tight_layout()
plt.show()

# Interpretation
within_avg = np.mean([small_small, mid_mid, big_big])
cross_avg = np.mean([small_mid, small_big, mid_big])
print(f"\nWithin-category avg similarity: {within_avg:.3f}")
print(f"Cross-category avg similarity: {cross_avg:.3f}")
print(f"Difference: {within_avg - cross_avg:.3f}")
if within_avg > cross_avg + 0.05:
    print("✓ Model is learning category-specific patterns")
else:
    print("✗ Model is NOT distinguishing categories well internally")

In [None]:
# V2 Analysis - Prediction quality

model.eval()

# Analyze P(liquidation) predictions
liq_pred_vs_actual = {'true_pos': 0, 'true_neg': 0, 'false_pos': 0, 'false_neg': 0}
size_chosen_hist = {i: 0 for i in range(NUM_SIZES)}
size_optimal_hist = {i: 0 for i in range(NUM_SIZES)}

with torch.no_grad():
    for candles, equity, targets, liq_targets, return_targets in val_loader:
        # Move data to GPU
        candles = candles.to(device)
        equity = equity.to(device)
        liq_targets = liq_targets.to(device)
        return_targets = return_targets.to(device)
        
        p_liq, e_return = model(candles, equity)
        chosen_idx = model.pick_size(candles, equity, LIQ_THRESHOLD)
        
        # Analyze liquidation predictions
        pred_liq = (p_liq > 0.5).float()
        
        tp = ((pred_liq == 1) & (liq_targets == 1)).sum().item()
        tn = ((pred_liq == 0) & (liq_targets == 0)).sum().item()
        fp = ((pred_liq == 1) & (liq_targets == 0)).sum().item()
        fn = ((pred_liq == 0) & (liq_targets == 1)).sum().item()
        
        liq_pred_vs_actual['true_pos'] += tp
        liq_pred_vs_actual['true_neg'] += tn
        liq_pred_vs_actual['false_pos'] += fp
        liq_pred_vs_actual['false_neg'] += fn
        
        # Size distribution
        for idx in chosen_idx:
            size_chosen_hist[idx.item()] += 1
        for idx in targets:
            size_optimal_hist[idx.item()] += 1

# Liquidation prediction stats
total_liq_preds = sum(liq_pred_vs_actual.values())
print("Liquidation Prediction Quality (across all 30 sizes):")
print(f"  True Positive:  {liq_pred_vs_actual['true_pos']:7d} ({liq_pred_vs_actual['true_pos']/total_liq_preds*100:5.1f}%)")
print(f"  True Negative:  {liq_pred_vs_actual['true_neg']:7d} ({liq_pred_vs_actual['true_neg']/total_liq_preds*100:5.1f}%)")
print(f"  False Positive: {liq_pred_vs_actual['false_pos']:7d} ({liq_pred_vs_actual['false_pos']/total_liq_preds*100:5.1f}%)")
print(f"  False Negative: {liq_pred_vs_actual['false_neg']:7d} ({liq_pred_vs_actual['false_neg']/total_liq_preds*100:5.1f}%) <- DANGEROUS")

# Safety metrics
if liq_pred_vs_actual['true_pos'] + liq_pred_vs_actual['false_neg'] > 0:
    recall = liq_pred_vs_actual['true_pos'] / (liq_pred_vs_actual['true_pos'] + liq_pred_vs_actual['false_neg'])
    print(f"\n  Liquidation Recall: {recall*100:.1f}% (ability to detect actual liquidations)")
if liq_pred_vs_actual['true_pos'] + liq_pred_vs_actual['false_pos'] > 0:
    precision = liq_pred_vs_actual['true_pos'] / (liq_pred_vs_actual['true_pos'] + liq_pred_vs_actual['false_pos'])
    print(f"  Liquidation Precision: {precision*100:.1f}% (accuracy when predicting liquidation)")

# Plot size distributions
fig, ax = plt.subplots(figsize=(14, 5))

x = np.arange(NUM_SIZES)
width = 0.35

chosen_counts = [size_chosen_hist[i] for i in range(NUM_SIZES)]
optimal_counts = [size_optimal_hist[i] for i in range(NUM_SIZES)]

ax.bar(x - width/2, chosen_counts, width, label='Model Chosen', alpha=0.7)
ax.bar(x + width/2, optimal_counts, width, label='Optimal', alpha=0.7)

ax.set_xlabel('Size Index')
ax.set_ylabel('Count')
ax.set_title('Size Distribution: Model vs Optimal')
ax.set_xticks(x[::3])  # Every 3rd tick
ax.set_xticklabels([f"{SIZES[i]:.2f}" for i in x[::3]], rotation=45)
ax.legend()

plt.tight_layout()
plt.show()

# Category distribution
cat_chosen = {'small': 0, 'mid': 0, 'big': 0}
cat_optimal = {'small': 0, 'mid': 0, 'big': 0}
for i in range(NUM_SIZES):
    size = SIZES[i]
    cat = get_category(size)
    cat_chosen[cat] += size_chosen_hist[i]
    cat_optimal[cat] += size_optimal_hist[i]

print("\nCategory Distribution:")
print(f"  {'Category':<8} | {'Chosen':>8} | {'Optimal':>8}")
print(f"  {'-'*8} | {'-'*8} | {'-'*8}")
for cat in ['small', 'mid', 'big']:
    print(f"  {cat:<8} | {cat_chosen[cat]:>8} | {cat_optimal[cat]:>8}")

In [None]:
# Save final checkpoint
save_checkpoint(model, optimizer, scheduler, EPOCHS, history, metrics, best_regret, f'final_epoch_{EPOCHS}.pt')
print(f"Final checkpoint saved: final_epoch_{EPOCHS}.pt")

# List all V2 checkpoints
print(f"\nAll V2 checkpoints in {CHECKPOINT_DIR}:")
for f in sorted(os.listdir(CHECKPOINT_DIR)):
    if f.endswith('.pt'):
        ckpt = torch.load(f"{CHECKPOINT_DIR}/{f}", map_location=device, weights_only=False)
        epoch = ckpt.get('epoch', '?')
        regret = ckpt.get('metrics', {}).get('regret', None)
        liq_rate = ckpt.get('metrics', {}).get('liquidation_rate', None)
        if regret is not None:
            print(f"  {f}: epoch {epoch}, regret {regret:.2f}%, liq {liq_rate:.1f}%")
        else:
            print(f"  {f}: epoch {epoch}")

print(f"\nBest model saved as 'best.pt' with regret {best_regret:.2f}%")
print(f"To resume: set LOAD_FROM = 'best.pt'")

In [None]:
# Test on random trades - V2 with detailed output
print("Sample V2 predictions:")
print(f"{'':>4} | {'Optimal':>8} | {'Chosen':>8} | {'P(liq)':>7} | {'E[ret]':>7} | {'Actual':>8} | {'Cat':>6}")
print("-" * 70)

model.eval()
for i in range(10):
    trade = random.choice(val_trades)
    
    # Prepare input
    candles_norm = normalize_candles_v2(trade['candles'])
    if candles_norm.shape[0] < max_len:
        padding = np.zeros((max_len - candles_norm.shape[0], INPUT_FEATURES))
        candles_norm = np.vstack([candles_norm, padding])
    
    candles_t = torch.FloatTensor(candles_norm).unsqueeze(0).to(device)
    equity_t = torch.FloatTensor([[EQUITY / 1000]]).to(device)
    
    with torch.no_grad():
        p_liq, e_return = model(candles_t, equity_t)
        chosen_idx = model.pick_size(candles_t, equity_t, LIQ_THRESHOLD).item()
    
    optimal_idx = SIZES.index(trade['optimal_size'])
    optimal_size = trade['optimal_size']
    chosen_size = SIZES[chosen_idx]
    
    # Get actual outcome for chosen size
    all_results = trade['all_results']
    chosen_result = all_results[chosen_idx]
    actual_ret = -70 if chosen_result['liquidated'] else chosen_result['return_pct']
    
    match = "ok" if get_category(chosen_size) == trade['category'] else "MISS"
    
    print(f"{i+1:>4} | {optimal_size:>8.2f} | {chosen_size:>8.2f} | "
          f"{p_liq[0, chosen_idx].item():>6.1%} | {e_return[0, chosen_idx].item():>6.1f}% | "
          f"{actual_ret:>7.1f}% | {match:>6}")