## 1. Mount Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')
print("‚úì Google Drive mounted!")

ValueError: mount failed

## 2. Extract Repository

In [None]:
import zipfile
from pathlib import Path

zip_path = Path('/content/drive/MyDrive/Sequence.zip')
extract_to = Path('/content/Sequence')

if not zip_path.exists():
    print(f"‚ùå ERROR: {zip_path} not found")
    print("Upload Sequence.zip to your Google Drive root (MyDrive)")
else:
    print(f"Extracting {zip_path}...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall('/content')
    
    # Handle possible directory naming
    extracted = [d for d in Path('/content').iterdir() if d.is_dir() and 'Sequence' in d.name]
    if extracted and extracted[0] != extract_to:
        extracted[0].rename(extract_to)
    
    print("‚úì Repository extracted!")

## 3. Setup Paths (Colab-Specific)

In [None]:
import sys
from pathlib import Path

ROOT = Path('/content/Sequence')
sys.path.insert(0, str(ROOT))
sys.path.insert(0, str(ROOT / 'run'))

# Create necessary directories
(ROOT / 'data' / 'data').mkdir(parents=True, exist_ok=True)
(ROOT / 'data' / 'raw').mkdir(parents=True, exist_ok=True)
(ROOT / 'models' / 'checkpoints').mkdir(parents=True, exist_ok=True)

print(f"‚úì Root: {ROOT}")

## 4. Install Dependencies

In [None]:
# Install PyTorch with CUDA support
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

# Install other dependencies
!pip install -q numpy pandas matplotlib seaborn scikit-learn tqdm
!pip install -q transformers backtesting ta histdata

print("‚úì All requirements installed!")

## 5. Configuration (Colab-Optimized)

‚ö†Ô∏è **Adjust these settings based on your needs and Colab tier**

In [None]:
# =============================================================================
# GOOGLE COLAB CONFIGURATION
# =============================================================================
# Start small for free tier limits (T4 GPU has ~15GB RAM)
PAIRS_TO_DOWNLOAD = ['gbpusd', 'eurusd']  # Limit to 2-3 pairs initially
YEARS_TO_DOWNLOAD = ['2023', '2024']      # 2 years of data

# Data settings
T_IN = 120
T_OUT = 10
TASK_TYPE = 'classification'
USE_INTRINSIC_TIME = False  # Set True for better performance (slower)
INCLUDE_SENTIMENT = False   # Set True to add GDELT sentiment (much slower)

# Training mode
TRAINING_MODE = "supervised"  # Options: 'supervised', 'sac'

# =============================================================================
# SUPERVISED LEARNING CONFIGURATION
# =============================================================================
SUPERVISED_CONFIG = {
    # Model architecture
    "hidden_size_lstm": 128,
    "num_layers_lstm": 2,
    "cnn_num_filters": 64,
    "attention_dim": 128,
    "dropout": 0.2,
    "num_classes": 3,  # 3 for classification (down/neutral/up)
    # Training hyperparameters
    "epochs": 20,  # Reduced for Colab free tier time limits
    "batch_size": 32,  # Lowered for RAM
    "learning_rate": 1e-3,
    "weight_decay": 1e-5,
    "grad_clip": 1.0,
    "use_amp": True,  # Enable for 2x faster training
    # Data splitting
    "train_ratio": 0.7,
    "val_ratio": 0.15,
    "test_ratio": 0.15,
    # Checkpointing
    "checkpoint_dir": "models/checkpoints",
    "early_stop_patience": 5,
    "lr_scheduler_patience": 3,
    "lr_scheduler_factor": 0.5,
}

# =============================================================================
# SAC (REINFORCEMENT LEARNING) CONFIGURATION
# =============================================================================
SAC_CONFIG = {
    # Agent hyperparameters
    "hidden_dim": 256,
    "learning_rate": 3e-4,
    "gamma": 0.99,
    "tau": 0.005,
    "alpha": 0.2,
    "auto_entropy_tuning": True,
    # Training configuration
    "total_steps": 10000,  # Lowered for Colab
    "batch_size": 128,
    "replay_buffer_size": 10000,
    "warmup_steps": 500,
    "update_interval": 1,
    "eval_interval": 2000,
    "eval_episodes": 5,
    # Environment configuration
    "initial_cash": 50000.0,
    "time_horizon": 100,
    "commission_pct": 0.0001,
    "spread_bps": 1.0,
    "reward_type": "incremental_pnl",
    "reward_scaling": 1.0,
    # Checkpointing
    "checkpoint_dir": "models/sac_checkpoints",
    "save_interval": 5000,
}

print(f"Training mode: {TRAINING_MODE.upper()}")
print(f"Pairs to download: {PAIRS_TO_DOWNLOAD}")
print(f"Years: {YEARS_TO_DOWNLOAD}")

## 6. GPU Check

In [None]:
import torch

print("="*60)
print("GPU Environment Check")
print("="*60)
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU count: {torch.cuda.device_count()}")
    for i in range(torch.cuda.device_count()):
        props = torch.cuda.get_device_properties(i)
        print(f"  GPU {i}: {props.name}")
        print(f"    Memory: {props.total_memory / 1024**3:.2f} GB")
        print(f"    Compute Capability: {props.major}.{props.minor}")
else:
    print("\n‚ö†Ô∏è  No GPU detected. Training will use CPU.")
    print("For best performance: Runtime ‚Üí Change runtime type ‚Üí GPU")
print("="*60)

## 7. Data Collection from HistData

This cell downloads historical FX data from HistData.com. It may take several minutes depending on the number of pairs and years.

In [None]:
from histdata.api import download_hist_data
from pathlib import Path

print("üì• Downloading Historical FX Data from HistData.com")
print("="*70)
print(f"Pairs: {', '.join([p.upper() for p in PAIRS_TO_DOWNLOAD])}")
print(f"Years: {', '.join(YEARS_TO_DOWNLOAD)}")
print("="*70 + "\n")

raw_data_dir = ROOT / 'data' / 'raw'
raw_data_dir.mkdir(parents=True, exist_ok=True)

download_stats = {'successful': 0, 'failed': 0}

for pair in PAIRS_TO_DOWNLOAD:
    print(f"\n{'‚îÄ'*70}")
    print(f"üìä Pair: {pair.upper()}")
    print(f"{'‚îÄ'*70}")
    
    pair_dir = raw_data_dir / pair
    pair_dir.mkdir(parents=True, exist_ok=True)
    
    for year in YEARS_TO_DOWNLOAD:
        try:
            print(f"  {year}... ", end='', flush=True)
            
            # Try downloading full year first
            try:
                result = download_hist_data(
                    year=int(year),
                    pair=pair,
                    output_directory=str(pair_dir),
                    verbose=False
                )
                print(f"‚úì {result}")
                download_stats['successful'] += 1
                
            except AssertionError:
                # Download month-by-month if full year not available
                print("(month-by-month)")
                months_ok = 0
                for month in range(1, 13):
                    try:
                        download_hist_data(
                            year=int(year),
                            month=month,
                            pair=pair,
                            output_directory=str(pair_dir),
                            verbose=False
                        )
                        months_ok += 1
                        download_stats['successful'] += 1
                    except:
                        pass
                print(f"    ‚úì {months_ok} months downloaded")
                
        except Exception as e:
            print(f"‚úó {str(e)[:50]}")
            download_stats['failed'] += 1

print(f"\n{'='*70}")
print(f"‚úì Successful: {download_stats['successful']}")
print(f"‚úó Failed: {download_stats['failed']}")
print("‚úì Data collection complete!")
print(f"{'='*70}")

## 8. Data Preparation

This cell processes raw FX data and creates features for training.

In [None]:
import subprocess

print("Starting data preparation pipeline...\n")

for pair in PAIRS_TO_DOWNLOAD:
    print(f"\nProcessing {pair.upper()}...")
    
    cmd = [
        'python', str(ROOT / 'data' / 'prepare_dataset.py'),
        '--pairs', pair,
        '--t-in', str(T_IN),
        '--t-out', str(T_OUT),
        '--task-type', TASK_TYPE,
    ]
    
    if USE_INTRINSIC_TIME:
        cmd.append('--intrinsic-time')
    if INCLUDE_SENTIMENT:
        cmd.append('--include-sentiment')
    
    result = subprocess.run(cmd, capture_output=True, text=True, cwd=str(ROOT))
    
    if result.returncode == 0:
        print(f"  ‚úì {pair.upper()} prepared")
    else:
        print(f"  ‚úó Failed: {result.stderr}")

print("\n‚úì Data preparation complete!")

## 9. Core Imports

In [None]:
import sys
from pathlib import Path
import time

# Ensure paths are set
ROOT = Path('/content/Sequence')
sys.path.insert(0, str(ROOT))
sys.path.insert(0, str(ROOT / 'run'))

# Core imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm

# Project imports
from config.config import ModelConfig
from utils.multi_gpu import setup_multi_gpu
from models.agent_hybrid import HybridCNNLSTMAttention

# Visualization settings
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')
%matplotlib inline

print("‚úì Imports complete!")

## 10. Helper Functions

In [None]:
def get_unwrapped_model(model):
    """Get the underlying model from DataParallel wrapper."""
    return model.module if isinstance(model, nn.DataParallel) else model


def format_time(seconds):
    """Format seconds into human-readable time."""
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    secs = int(seconds % 60)
    
    if hours > 0:
        return f"{hours}h {minutes}m {secs}s"
    elif minutes > 0:
        return f"{minutes}m {secs}s"
    else:
        return f"{secs}s"


class SequenceDataset(Dataset):
    """PyTorch Dataset for time series sequences."""
    
    def __init__(self, features, targets, t_in):
        self.features = torch.FloatTensor(features)
        self.targets = torch.LongTensor(targets) if targets.dtype == np.int64 else torch.FloatTensor(targets)
        self.t_in = t_in
    
    def __len__(self):
        return len(self.features) - self.t_in
    
    def __getitem__(self, idx):
        x = self.features[idx:idx + self.t_in]
        y = self.targets[idx + self.t_in]
        return x, y

print("‚úì Helper functions defined!")

## 11. Supervised Learning Functions

In [None]:
def train_epoch_supervised(model, loader, criterion, optimizer, device, scaler=None, grad_clip=None):
    """Train for one epoch (supervised learning)."""
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for x, y in tqdm(loader, desc="Training", leave=False):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        
        if scaler:
            with torch.cuda.amp.autocast():
                outputs = model(x)
                loss = criterion(outputs, y)
            scaler.scale(loss).backward()
            if grad_clip:
                scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
            scaler.step(optimizer)
            scaler.update()
        else:
            outputs = model(x)
            loss = criterion(outputs, y)
            loss.backward()
            if grad_clip:
                torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
            optimizer.step()
        
        total_loss += loss.item()
        _, predicted = outputs.max(1)
        total += y.size(0)
        correct += predicted.eq(y).sum().item()
    
    return total_loss / len(loader), 100. * correct / total


def validate_supervised(model, loader, criterion, device):
    """Validate the model (supervised learning)."""
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for x, y in tqdm(loader, desc="Validating", leave=False):
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            loss = criterion(outputs, y)
            
            total_loss += loss.item()
            _, predicted = outputs.max(1)
            total += y.size(0)
            correct += predicted.eq(y).sum().item()
    
    return total_loss / len(loader), 100. * correct / total


def train_supervised(pair, config):
    """Complete supervised training pipeline for a single pair."""
    print(f"\n{'='*80}")
    print(f"SUPERVISED TRAINING: {pair.upper()}")
    print(f"{'='*80}\n")
    
    start_time = time.time()
    
    # Load data
    data_path = ROOT / 'data' / 'data' / pair / f'{pair}_prepared.csv'
    print(f"Loading data from: {data_path}")
    df = pd.read_csv(data_path)
    print(f"‚úì Loaded {len(df):,} rows\n")
    
    # Separate features and targets
    target_col = 'target' if 'target' in df.columns else 'label'
    feature_cols = [col for col in df.columns if col not in [target_col, 'timestamp', 'date', 'time']]
    features = df[feature_cols].values
    targets = df[target_col].values
    
    # Split data
    n_samples = len(features)
    train_end = int(n_samples * config['train_ratio'])
    val_end = int(n_samples * (config['train_ratio'] + config['val_ratio']))
    
    train_features, train_targets = features[:train_end], targets[:train_end]
    val_features, val_targets = features[train_end:val_end], targets[train_end:val_end]
    test_features, test_targets = features[val_end:], targets[val_end:]
    
    print("Dataset split:")
    print(f"  Train: {len(train_features):,} samples ({config['train_ratio']*100:.0f}%)")
    print(f"  Val:   {len(val_features):,} samples ({config['val_ratio']*100:.0f}%)")
    print(f"  Test:  {len(test_features):,} samples ({config['test_ratio']*100:.0f}%)\n")
    
    # Create datasets
    train_dataset = SequenceDataset(train_features, train_targets, T_IN)
    val_dataset = SequenceDataset(val_features, val_targets, T_IN)
    test_dataset = SequenceDataset(test_features, test_targets, T_IN)
    
    train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, num_workers=2, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=False, num_workers=2, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False, num_workers=2, pin_memory=True)
    
    # Create model
    model_config = ModelConfig(
        num_features=len(feature_cols),
        hidden_size_lstm=config['hidden_size_lstm'],
        num_layers_lstm=config['num_layers_lstm'],
        cnn_num_filters=config['cnn_num_filters'],
        attention_dim=config['attention_dim'],
        dropout=config['dropout'],
        num_classes=config['num_classes'],
        use_optimized_attention=False
    )
    
    model = HybridCNNLSTMAttention(model_config)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model, device = setup_multi_gpu(model, device)
    
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Model: HybridCNNLSTMAttention ({total_params:,} parameters)")
    print(f"Device: {device}\n")
    
    # Training setup
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'], weight_decay=config['weight_decay'])
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=config['lr_scheduler_factor'],
        patience=config['lr_scheduler_patience'], verbose=False
    )
    scaler = torch.cuda.amp.GradScaler() if config['use_amp'] and torch.cuda.is_available() else None
    
    # Checkpoint directory
    checkpoint_dir = ROOT / config['checkpoint_dir']
    checkpoint_dir.mkdir(parents=True, exist_ok=True)
    checkpoint_path = checkpoint_dir / f'{pair}_best_model.pt'
    
    # Training loop
    history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': [], 'lr': []}
    best_val_loss = float('inf')
    patience_counter = 0
    
    print(f"Starting training for {config['epochs']} epochs...\n")
    
    for epoch in range(1, config['epochs'] + 1):
        train_loss, train_acc = train_epoch_supervised(
            model, train_loader, criterion, optimizer, device, scaler, config['grad_clip']
        )
        val_loss, val_acc = validate_supervised(model, val_loader, criterion, device)
        scheduler.step(val_loss)
        current_lr = optimizer.param_groups[0]['lr']
        
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        history['lr'].append(current_lr)
        
        if epoch % 5 == 0 or epoch == 1:
            print(f"Epoch {epoch:3d}/{config['epochs']} | "
                  f"Train Loss: {train_loss:.4f} Acc: {train_acc:.2f}% | "
                  f"Val Loss: {val_loss:.4f} Acc: {val_acc:.2f}% | "
                  f"LR: {current_lr:.2e}")
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save({
                'epoch': epoch,
                'model_state_dict': get_unwrapped_model(model).state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'val_loss': val_loss,
                'val_acc': val_acc,
                'model_config': model_config.__dict__,
                'pair': pair,
                'history': history
            }, checkpoint_path)
        else:
            patience_counter += 1
        
        if patience_counter >= config['early_stop_patience']:
            print(f"\nEarly stopping at epoch {epoch}")
            break
    
    # Test evaluation
    checkpoint = torch.load(checkpoint_path)
    get_unwrapped_model(model).load_state_dict(checkpoint['model_state_dict'])
    test_loss, test_acc = validate_supervised(model, test_loader, criterion, device)
    
    elapsed_time = time.time() - start_time
    
    print(f"\n{'='*80}")
    print(f"TRAINING COMPLETE: {pair.upper()}")
    print(f"{'='*80}")
    print(f"Time elapsed: {format_time(elapsed_time)}")
    print(f"Best val loss: {best_val_loss:.4f} (epoch {checkpoint['epoch']})")
    print(f"Test loss: {test_loss:.4f}")
    print(f"Test accuracy: {test_acc:.2f}%")
    print(f"Checkpoint: {checkpoint_path}")
    print(f"{'='*80}\n")
    
    return {
        'pair': pair,
        'best_val_loss': best_val_loss,
        'test_loss': test_loss,
        'test_acc': test_acc,
        'epochs_trained': len(history['train_loss']),
        'time_elapsed': elapsed_time,
        'checkpoint_path': str(checkpoint_path)
    }

print("‚úì Supervised learning functions defined!")

## 12. Multi-Pair Training Loop

This cell trains models sequentially for all configured pairs.

In [None]:
# Track results for all pairs
all_results = []

print(f"\n{'#'*80}")
print("MULTI-PAIR TRAINING SESSION")
print(f"{'#'*80}")
print(f"Mode: {TRAINING_MODE.upper()}")
print(f"Pairs: {', '.join([p.upper() for p in PAIRS_TO_DOWNLOAD])}")
print(f"Total pairs: {len(PAIRS_TO_DOWNLOAD)}")
print(f"{'#'*80}\n")

session_start_time = time.time()

for idx, pair in enumerate(PAIRS_TO_DOWNLOAD, 1):
    print(f"\n[{idx}/{len(PAIRS_TO_DOWNLOAD)}] Processing {pair.upper()}...")
    
    try:
        if TRAINING_MODE == 'supervised':
            result = train_supervised(pair, SUPERVISED_CONFIG)
        elif TRAINING_MODE == 'sac':
            print("‚ö†Ô∏è  SAC training not implemented in this notebook yet.")
            print("   For SAC, use the RL training scripts or add SAC functions.")
            result = {'pair': pair, 'error': 'SAC not implemented'}
        else:
            raise ValueError(f"Unknown training mode: {TRAINING_MODE}")
        
        all_results.append(result)
        print(f"‚úì {pair.upper()} completed successfully")
        
    except Exception as e:
        print(f"‚úó {pair.upper()} failed: {e}")
        all_results.append({
            'pair': pair,
            'error': str(e)
        })
    
    # Show remaining pairs
    if idx < len(PAIRS_TO_DOWNLOAD):
        remaining = PAIRS_TO_DOWNLOAD[idx:]
        print(f"\nRemaining pairs: {', '.join([p.upper() for p in remaining])}")

session_elapsed_time = time.time() - session_start_time

print(f"\n{'#'*80}")
print("ALL TRAINING COMPLETE")
print(f"{'#'*80}")
print(f"Total session time: {format_time(session_elapsed_time)}")
print(f"Pairs processed: {len(all_results)}/{len(PAIRS_TO_DOWNLOAD)}")
print(f"{'#'*80}\n")

## 13. Results Visualization

In [None]:
if TRAINING_MODE == 'supervised':
    # Plot test accuracy comparison
    successful_results = [r for r in all_results if 'test_acc' in r]
    
    if successful_results:
        pairs = [r['pair'].upper() for r in successful_results]
        test_accs = [r['test_acc'] for r in successful_results]
        times = [r['time_elapsed'] / 60 for r in successful_results]  # Convert to minutes
        
        fig, axes = plt.subplots(1, 2, figsize=(14, 5))
        
        # Test accuracy
        axes[0].bar(pairs, test_accs, color='steelblue', alpha=0.8)
        axes[0].set_ylabel('Test Accuracy (%)')
        axes[0].set_title('Test Accuracy by Pair')
        axes[0].grid(True, alpha=0.3, axis='y')
        axes[0].axhline(y=50, color='red', linestyle='--', alpha=0.5, label='Random baseline')
        axes[0].legend()
        
        # Training time
        axes[1].bar(pairs, times, color='coral', alpha=0.8)
        axes[1].set_ylabel('Training Time (minutes)')
        axes[1].set_title('Training Time by Pair')
        axes[1].grid(True, alpha=0.3, axis='y')
        
        plt.tight_layout()
        plt.show()
        
        # Print summary table
        print("\n" + "="*60)
        print("TRAINING SUMMARY")
        print("="*60)
        for r in successful_results:
            print(f"{r['pair'].upper():8s} | Acc: {r['test_acc']:6.2f}% | Time: {r['time_elapsed']/60:5.1f}m")
        print("="*60)
    else:
        print("No successful results to visualize.")
elif TRAINING_MODE == 'sac':
    print("SAC visualization not implemented. Add SAC evaluation metrics for visualization.")

## 14. Backup Checkpoints to Google Drive

This ensures your trained models persist even if the Colab session ends.

In [None]:
import shutil

drive_checkpoint_dir = Path('/content/drive/MyDrive/Sequence_Models')
drive_checkpoint_dir.mkdir(parents=True, exist_ok=True)

print("Backing up checkpoints to Google Drive...\n")

checkpoint_dir = ROOT / 'models' / 'checkpoints'
backup_count = 0

for checkpoint_file in checkpoint_dir.glob('*.pt'):
    shutil.copy2(checkpoint_file, drive_checkpoint_dir / checkpoint_file.name)
    print(f"‚úì {checkpoint_file.name}")
    backup_count += 1

print(f"\n‚úì {backup_count} checkpoint(s) saved to: {drive_checkpoint_dir}")
print("\nYour models are now safely backed up to Google Drive!")

---

## üéâ Training Complete!

### Next Steps

1. **Download Checkpoints**: Access them from your Google Drive at `MyDrive/Sequence_Models/`
2. **Extend Training**: Add more pairs to `PAIRS_TO_DOWNLOAD` and re-run
3. **Evaluate**: Use the checkpoints for backtesting and live trading
4. **Experiment**: Try different hyperparameters or enable intrinsic time

### Troubleshooting

- **Out of memory**: Reduce `batch_size` or number of pairs
- **Session timeout**: Save checkpoints more frequently
- **Import errors**: Re-run the setup cells

### Resources

- Main documentation: `CLAUDE.md` in repository
- Colab guide: `notebooks/COLAB_SETUP_GUIDE.md`
- Training scripts: `run/training_pipeline.py`