Setup environment

In [1]:
# Ensure src folder is importable
import sys
from pathlib import Path

project_root = Path.cwd().parent
sys.path.append(str(project_root))

# Auto-reload changes in .py files
# %load_ext autoreload
# %autoreload 2

Imports

In [2]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

from src.models.train import TimeSeriesTrainer

Initialize trainer

In [3]:
# Choose model type: 'lstm' or 'transformer'
model_type = 'lstm'  # or 'transformer'

trainer = TimeSeriesTrainer(model_type=model_type)
print("Device:", trainer.device)

Device: cuda


Prepare data

In [4]:
# Specify cryptocurrency ticker and sequence length
ticker = 'BTC-USD'
sequence_length = 60

train_loader, val_loader, test_loader, feature_scaler, target_scaler, feature_cols = trainer.prepare_data(
    ticker=ticker,
    sequence_length=sequence_length,
    test_size=0.2,
    val_size=0.2
)

print("Number of features:", len(feature_cols))


Data saved to data/raw\BTC-USD_2024-01-15_2026-01-14.csv
Number of features: 27


  dip[idx] = 100 * (self._dip[idx] / value)
  din[idx] = 100 * (self._din[idx] / value)


Build model

In [5]:
input_size = len(feature_cols)
output_size = 1

# LSTM-specific parameters
model_params = {
    'hidden_size': 64,
    'num_layers': 2,
    'dropout': 0.2,
    'bidirectional': False,
    'use_gru': False
}

# Transformer-specific parameters
transformer_params = {
    'd_model': 64,
    'nhead': 4,
    'num_encoder_layers': 2,
    'dim_feedforward': 128,
    'dropout': 0.1
}

if model_type == 'lstm':
    model = trainer.build_model(input_size=input_size, output_size=output_size, **model_params)
else:
    model = trainer.build_model(input_size=input_size, output_size=output_size, **transformer_params)

print(model)


LSTMModel(
  (rnn): LSTM(27, 64, num_layers=2, batch_first=True, dropout=0.2)
  (dropout): Dropout(p=0.2, inplace=False)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)


Train model

In [6]:
# Train the model
history = trainer.train(
    train_loader,
    val_loader,
    epochs=50,       # adjust as needed
    lr=0.001,
    patience=5,
    save_path=f'models/best_{model_type}.pth'
)

# Plot training history
trainer.plot_history()

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/50 - 0.56s - train_loss: 0.107378 - val_loss: 0.096852


RuntimeError: Parent directory models does not exist.

Evaluate model

In [None]:
metrics, predictions, actuals = trainer.evaluate(test_loader, target_scaler=target_scaler)

print("Evaluation Metrics:")
for k, v in metrics.items():
    print(f"{k}: {v:.6f}")

# Plot predictions vs actual values
trainer.plot_predictions(actuals, predictions, n=200)


Quick inference on latest data (optional)

In [None]:
# Fetch latest 30-day sequence
import pandas as pd
from src.data.data_loader import CryptoDataLoader
from src.data.feature_engineering import FeatureEngineer

loader = CryptoDataLoader()
engineer = FeatureEngineer()

latest_data = loader.get_latest_data(ticker, days=sequence_length)
latest_features = engineer.add_technical_indicators(latest_data)

# Normalize features
latest_features_scaled = feature_scaler.transform(latest_features[feature_cols])
latest_seq = np.expand_dims(latest_features_scaled, axis=0)  # shape: [1, seq_len, n_features]

latest_seq_tensor = torch.tensor(latest_seq, dtype=torch.float32).to(trainer.device)

trainer.model.eval()
with torch.no_grad():
    prediction = trainer.model(latest_seq_tensor)
    prediction_value = target_scaler.inverse_transform(prediction.cpu().numpy().reshape(-1,1))
    print(f"Predicted next Close for {ticker}: {prediction_value[0,0]:.2f}")
