# Model Development and Experimentation

This notebook demonstrates how to develop, train, and evaluate machine learning models for predicting cryptocurrency price movements using BTB.

In [1]:
import os
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# import torch.optim as optim # Removed, handled by model classes
# from torch.utils.data import DataLoader, TensorDataset # Removed, handled by model classes
# from sklearn.preprocessing import StandardScaler # Removed, handled by DataPreprocessor
from sklearn.model_selection import train_test_split

# Add project root to path for imports
sys.path.append(os.path.abspath(".."))

from btb.data.preprocessing import DataPreprocessor  # Added
from btb.models.lstm import LSTMModel
from btb.models.transformer import TransformerModel
from btb.utils.config import load_config


fatal: bad revision 'HEAD'


## 1. Load Configuration and Processed Data

In [2]:
# Load model configuration
config = load_config("../config/model_config.yaml")
print("Model configuration loaded")

Model configuration loaded


In [3]:
# Load backtest config to get symbol/timeframe used for processed data
backtest_config = load_config("../config/backtest_config.yaml")
symbol = backtest_config["backtest"]["symbols"][0]
timeframe = backtest_config["backtest"]["timeframes"][0]

# Construct path and load the processed data
processed_data_path = f"../data/processed/{symbol}_{timeframe}_processed.csv"
print(f"Loading processed data from: {processed_data_path}")
try:
    df = pd.read_csv(processed_data_path, index_col=0, parse_dates=True)
    print("Processed data loaded successfully.")
    df.head()
except FileNotFoundError:
    print("ERROR: Processed data file not found. Please run notebook 1 first.")
    # Optionally raise error or exit
    # raise

Loading processed data from: ../data/processed/BTCUSDT_1h_processed.csv
Processed data loaded successfully.


## 2. Prepare Data for Time Series Modeling

In [4]:
# Instantiate DataPreprocessor
preprocessor = DataPreprocessor()

# Set sequence length and prediction horizon from model config
seq_length = config["features"]["sequence_length"]
prediction_horizon = config["prediction"].get("prediction_horizon", 1)
target_column = "close"  # Default target for sequence creation
classification_mode = config["prediction"].get("output_type", "regression") == "classification"
classification_threshold = config["prediction"].get(
    "confidence_threshold", 0.0
)  # Use confidence_threshold if classification

# Create sequences using DataPreprocessor
# Note: Ensure the input df has the necessary columns (features + target_column)
# The preprocessor handles target creation internally based on target_column and horizon.
X, y = preprocessor.create_sequences(
    data=df,
    sequence_length=seq_length,
    target_column=target_column,
    prediction_horizon=prediction_horizon,
    classification=classification_mode,
    threshold=classification_threshold,
)

# Print shapes
print(f"Generated sequences - X shape: {X.shape}, y shape: {y.shape}")

Generated sequences - X shape: (8478, 60, 22), y shape: (8478,)


In [5]:
# Split into train, validation, test sets
train_size = int(len(X) * 0.7)
val_size = int(len(X) * 0.15)
test_size = len(X) - train_size - val_size

X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size : train_size + val_size], y[train_size : train_size + val_size]
X_test, y_test = X[train_size + val_size :], y[train_size + val_size :]

print(f"Train: {X_train.shape}, Validation: {X_val.shape}, Test: {X_test.shape}")

Train: (5934, 60, 22), Validation: (1271, 60, 22), Test: (1273, 60, 22)


In [6]:
# Convert split data to PyTorch tensors
# The model's train method expects tuples of (X, y) tensors
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train).view(-1, 1)
X_val_tensor = torch.FloatTensor(X_val)
y_val_tensor = torch.FloatTensor(y_val).view(-1, 1)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.FloatTensor(y_test).view(-1, 1)

print("Data converted to PyTorch tensors.")
# Removed DataLoader creation - handled internally by model.train()

Data converted to PyTorch tensors.


## 3. Train and Evaluate LSTM Model

In [7]:
# Prepare LSTM model configuration from the loaded model_config.yaml
# The LSTMModel class expects specific keys in its config dict
lstm_model_config = config.get("model", {}).copy()  # Get model specific params
lstm_training_config = config.get("training", {}).copy()  # Get training params

# Combine and adapt parameters for LSTMModel constructor
lstm_combined_config = {
    "input_dim": X_train_tensor.shape[2],  # Number of features in input sequences
    "output_dim": 1,  # Predicting a single value
    "hidden_dim": lstm_model_config.get("hidden_dim", 128),
    "num_layers": lstm_model_config.get("num_layers", 2),
    "dropout": float(lstm_model_config.get("dropout", 0.1)),  # Ensure float
    "learning_rate": float(lstm_training_config.get("learning_rate", 0.001)),  # Ensure float
    "epochs": lstm_training_config.get("epochs", 10),
    "batch_size": lstm_training_config.get("batch_size", 64),
    "patience": lstm_training_config.get("patience", 15),
    "early_stopping": lstm_training_config.get("early_stopping", True),
    "optimizer": lstm_training_config.get("optimizer", "adam"),
    "scheduler": lstm_training_config.get("scheduler"),
    "weight_decay": float(lstm_training_config.get("weight_decay", 0)),  # Ensure float
    "gradient_clipping": lstm_training_config.get("gradient_clipping"),
}

# Initialize LSTM model using the class from btb.models
print("Initializing LSTMModel...")
lstm_model = LSTMModel(config=lstm_combined_config)
print(f"LSTMModel initialized on device: {lstm_model.device}")
# Optimizer and criterion are handled internally by the LSTMModel class

Initializing LSTMModel...
LSTMModel initialized on device: cpu


In [None]:
# Train LSTM model using its train method
print("Training LSTM model...")
lstm_train_metrics = lstm_model.train(
    train_data=(X_train_tensor, y_train_tensor), validation_data=(X_val_tensor, y_val_tensor)
)
print("LSTM training complete.")
print(f"Final LSTM Train Loss: {lstm_train_metrics.get('final_train_loss', 'N/A'):.6f}")
print(f"Final LSTM Val Loss: {lstm_train_metrics.get('final_val_loss', 'N/A'):.6f}")
print(f"Best LSTM Val Loss: {lstm_train_metrics.get('best_val_loss', 'N/A'):.6f}")

Training LSTM model...


In [None]:
# Plot training and validation loss from metrics dictionary
plt.figure(figsize=(10, 6))
if "train_losses" in lstm_train_metrics:
    plt.plot(lstm_train_metrics["train_losses"], label="Train Loss")
if "val_losses" in lstm_train_metrics:
    plt.plot(lstm_train_metrics["val_losses"], label="Validation Loss")
plt.title("LSTM Model Training")
plt.xlabel("Epoch")
plt.ylabel("Loss")
if "train_losses" in lstm_train_metrics or "val_losses" in lstm_train_metrics:
    plt.legend()
plt.grid(True)
plt.show()

## 4. Train and Evaluate Transformer Model

In [None]:
# Prepare Transformer model configuration from the loaded model_config.yaml
transformer_model_config = config.get("model", {}).copy()
transformer_training_config = config.get("training", {}).copy()

# Combine and adapt parameters for TransformerModel constructor
transformer_combined_config = {
    "feature_dim": X_train_tensor.shape[2],  # Number of features
    "output_dim": 1,  # Predicting a single value
    "d_model": transformer_model_config.get("hidden_dim", 128),  # Use hidden_dim as d_model
    "nhead": transformer_model_config.get("attention_heads", 8),
    "num_encoder_layers": transformer_model_config.get("num_layers", 3),
    "dim_feedforward": transformer_model_config.get("dim_feedforward", 2048),  # Add default if missing
    "dropout": float(transformer_model_config.get("dropout", 0.1)),  # Ensure float
    "learning_rate": float(transformer_training_config.get("learning_rate", 0.001)),  # Ensure float
    "epochs": transformer_training_config.get("epochs", 10),
    "batch_size": transformer_training_config.get("batch_size", 64),
    "patience": transformer_training_config.get("patience", 15),
    "early_stopping": transformer_training_config.get("early_stopping", True),
    "optimizer": transformer_training_config.get("optimizer", "adam"),
    "scheduler": transformer_training_config.get("scheduler"),
    "weight_decay": float(transformer_training_config.get("weight_decay", 0)),  # Ensure float
    "gradient_clipping": transformer_training_config.get("gradient_clipping"),
}

# Initialize Transformer model using the class from btb.models
print("Initializing TransformerModel...")
transformer_model = TransformerModel(config=transformer_combined_config)
print(f"TransformerModel initialized on device: {transformer_model.device}")
# Optimizer and criterion are handled internally by the TransformerModel class

In [None]:
# Train Transformer model using its train method
print("Training Transformer model...")
transformer_train_metrics = transformer_model.train(
    train_data=(X_train_tensor, y_train_tensor), validation_data=(X_val_tensor, y_val_tensor)
)
print("Transformer training complete.")
print(f"Final Transformer Train Loss: {transformer_train_metrics.get('final_train_loss', 'N/A'):.6f}")
print(f"Final Transformer Val Loss: {transformer_train_metrics.get('final_val_loss', 'N/A'):.6f}")
print(f"Best Transformer Val Loss: {transformer_train_metrics.get('best_val_loss', 'N/A'):.6f}")

In [None]:
# Plot training and validation loss from metrics dictionary
plt.figure(figsize=(10, 6))
if "train_losses" in transformer_train_metrics:
    plt.plot(transformer_train_metrics["train_losses"], label="Train Loss")
if "val_losses" in transformer_train_metrics:
    plt.plot(transformer_train_metrics["val_losses"], label="Validation Loss")
plt.title("Transformer Model Training")
plt.xlabel("Epoch")
plt.ylabel("Loss")
if "train_losses" in transformer_train_metrics or "val_losses" in transformer_train_metrics:
    plt.legend()
plt.grid(True)
plt.show()

## 5. Model Evaluation and Comparison

In [None]:
# Evaluate LSTM model using its predict method
print("Evaluating LSTM model...")
lstm_predictions = lstm_model.predict(X_test_tensor)
lstm_actuals = y_test_tensor.cpu().numpy().flatten()
lstm_predictions = lstm_predictions.flatten()

# Calculate LSTM metrics
lstm_mse = mean_squared_error(lstm_actuals, lstm_predictions)
lstm_rmse = np.sqrt(lstm_mse)
lstm_mae = mean_absolute_error(lstm_actuals, lstm_predictions)
lstm_r2 = r2_score(lstm_actuals, lstm_predictions)
lstm_results = {
    "predictions": lstm_predictions,
    "actuals": lstm_actuals,
    "mse": lstm_mse,
    "rmse": lstm_rmse,
    "mae": lstm_mae,
    "r2": lstm_r2,
}

# Evaluate Transformer model using its predict method
print("Evaluating Transformer model...")
transformer_predictions = transformer_model.predict(X_test_tensor)
transformer_actuals = y_test_tensor.cpu().numpy().flatten()  # Same actuals as LSTM
transformer_predictions = transformer_predictions.flatten()

# Calculate Transformer metrics
transformer_mse = mean_squared_error(transformer_actuals, transformer_predictions)
transformer_rmse = np.sqrt(transformer_mse)
transformer_mae = mean_absolute_error(transformer_actuals, transformer_predictions)
transformer_r2 = r2_score(transformer_actuals, transformer_predictions)
transformer_results = {
    "predictions": transformer_predictions,
    "actuals": transformer_actuals,
    "mse": transformer_mse,
    "rmse": transformer_rmse,
    "mae": transformer_mae,
    "r2": transformer_r2,
}

# Print evaluation metrics
print("LSTM Model Metrics:")
print(f"MSE: {lstm_results['mse']:.6f}")
print(f"RMSE: {lstm_results['rmse']:.6f}")
print(f"MAE: {lstm_results['mae']:.6f}")
print(f"R²: {lstm_results['r2']:.6f}")
print("\nTransformer Model Metrics:")
print(f"MSE: {transformer_results['mse']:.6f}")
print(f"RMSE: {transformer_results['rmse']:.6f}")
print(f"MAE: {transformer_results['mae']:.6f}")
print(f"R²: {transformer_results['r2']:.6f}")

In [None]:
# Plot predictions vs actuals
plt.figure(figsize=(12, 8))

# Plot a subset for better visualization
subset_size = min(300, len(lstm_results["actuals"]))

plt.subplot(2, 1, 1)
plt.plot(lstm_results["actuals"][:subset_size], label="Actual")
plt.plot(lstm_results["predictions"][:subset_size], label="LSTM Predictions")
plt.title("LSTM Model: Predictions vs Actuals")
plt.legend()
plt.grid(True)

plt.subplot(2, 1, 2)
plt.plot(transformer_results["actuals"][:subset_size], label="Actual")
plt.plot(transformer_results["predictions"][:subset_size], label="Transformer Predictions")
plt.title("Transformer Model: Predictions vs Actuals")
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

## 6. Visualize Prediction Errors

In [None]:
# Calculate prediction errors
lstm_errors = lstm_results["predictions"] - lstm_results["actuals"]
transformer_errors = transformer_results["predictions"] - transformer_results["actuals"]

# Plot error distributions
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.hist(lstm_errors, bins=50, alpha=0.7)
plt.title("LSTM Error Distribution")
plt.axvline(x=0, color="r", linestyle="--")
plt.grid(True)

plt.subplot(1, 2, 2)
plt.hist(transformer_errors, bins=50, alpha=0.7)
plt.title("Transformer Error Distribution")
plt.axvline(x=0, color="r", linestyle="--")
plt.grid(True)

plt.tight_layout()
plt.show()

## 7. Hyperparameter Tuning

In [None]:
# Example of simple hyperparameter tuning for LSTM
# In practice, you would use a more systematic approach (e.g., grid search, random search, Bayesian optimization)
# This function now uses the LSTMModel class from btb.models


def tune_lstm_hyperparameters(hidden_dims, num_layers_options, dropout_rates, base_config, train_data, val_data):
    best_val_loss = float("inf")
    best_params = {}
    results = []

    # Get base parameters needed for model init
    input_dim = train_data[0].shape[2]
    output_dim = 1
    num_test_epochs = 5  # Reduced epochs for tuning

    for hidden_dim in hidden_dims:
        for num_layers in num_layers_options:
            for dropout in dropout_rates:
                print(f"Tuning LSTM: hidden={hidden_dim}, layers={num_layers}, dropout={dropout}")
                # Create a temporary config for this run
                temp_config = base_config.copy()  # Start with base LSTM params
                temp_config["input_dim"] = input_dim
                temp_config["output_dim"] = output_dim
                temp_config["hidden_dim"] = hidden_dim
                temp_config["num_layers"] = num_layers
                temp_config["dropout"] = float(dropout)  # Ensure float
                temp_config["epochs"] = num_test_epochs  # Use reduced epochs

                # Initialize model with current hyperparameters
                model = LSTMModel(config=temp_config)

                # Train using the model's train method
                train_metrics = model.train(train_data=train_data, validation_data=val_data)

                # Record results (use best_val_loss from metrics if available)
                final_val_loss = train_metrics.get("best_val_loss", train_metrics.get("final_val_loss", float("inf")))
                results.append(
                    {"hidden_dim": hidden_dim, "num_layers": num_layers, "dropout": dropout, "val_loss": final_val_loss}
                )

                # Track best model
                if final_val_loss < best_val_loss:
                    best_val_loss = final_val_loss
                    best_params = {"hidden_dim": hidden_dim, "num_layers": num_layers, "dropout": dropout}
                    print(f"  -> New best val_loss: {best_val_loss:.6f}")

    return pd.DataFrame(results), best_params

In [None]:
# Define hyperparameter search space
hidden_dims = [64, 128]
num_layers_options = [1, 2]
dropout_rates = [0.1, 0.3]

# Prepare data tuples for tuning function
train_data_tuple = (X_train_tensor, y_train_tensor)
val_data_tuple = (X_val_tensor, y_val_tensor)

# Run hyperparameter tuning (uncomment to run - this can take time)
# tuning_results, best_params = tune_lstm_hyperparameters(
#     hidden_dims, num_layers_options, dropout_rates,
#     lstm_combined_config, # Pass the base LSTM config used for main training
#     train_data_tuple, val_data_tuple
# )
#
# print("Hyperparameter tuning results:")
# print(tuning_results.sort_values('val_loss'))
# print("\nBest parameters:")
# print(best_params)

## 8. Trade Signal Generation

In [None]:
def generate_trading_signals(predictions, threshold=0.0):
    """Generate trading signals based on predictions"""
    signals = np.zeros_like(predictions)
    signals[predictions > threshold] = 1  # Buy signal
    signals[predictions < -threshold] = -1  # Sell signal
    return signals


# Generate signals
lstm_signals = generate_trading_signals(lstm_results["predictions"], threshold=0.001)
transformer_signals = generate_trading_signals(transformer_results["predictions"], threshold=0.001)

# Count signal types
print("LSTM Signals:")
print(f"Buy signals: {np.sum(lstm_signals == 1)}")
print(f"Sell signals: {np.sum(lstm_signals == -1)}")
print(f"Hold signals: {np.sum(lstm_signals == 0)}")

print("\nTransformer Signals:")
print(f"Buy signals: {np.sum(transformer_signals == 1)}")
print(f"Sell signals: {np.sum(transformer_signals == -1)}")
print(f"Hold signals: {np.sum(transformer_signals == 0)}")

## 9. Save Trained Models

In [None]:
# Create models directory if it doesn't exist
models_dir = "../models/"
os.makedirs(models_dir, exist_ok=True)

# Save models using their save method
lstm_model_path = f"{models_dir}lstm_model.pth"
transformer_model_path = f"{models_dir}transformer_model.pth"

lstm_model.save(lstm_model_path)
transformer_model.save(transformer_model_path)

print(f"LSTM model saved to {lstm_model_path}")
print(f"Transformer model saved to {transformer_model_path}")

## 10. Summary and Next Steps

In this notebook, we:

1. Loaded and prepared time series data for ML modeling
2. Implemented and trained LSTM and Transformer models for price prediction
3. Evaluated and compared model performance using various metrics
4. Explored error distributions and prediction patterns
5. Demonstrated basic hyperparameter tuning
6. Generated trading signals based on model predictions
7. Saved trained models for use in backtesting and live trading

Next steps:
- Use these models in the backtesting framework to evaluate trading performance
- Refine model architectures and hyperparameters for better performance
- Explore ensemble methods for more robust predictions
- Implement advanced training techniques (e.g., focal loss, adversarial training)