Setup environment

In [None]:
# Ensure src folder is importable
import sys
from pathlib import Path

project_root = Path.cwd().parent
sys.path.append(str(project_root))

# Auto-reload changes in .py files
%load_ext autoreload
%autoreload 2

Imports

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

import numpy as np
import matplotlib.pyplot as plt

from src.data.data_loader import CryptoDataLoader
from src.data.feature_engineering import FeatureEngineer
from src.models.transformer import TransformerModel

Load & prepare data

In [None]:
# Load BTC data
loader = CryptoDataLoader(data_dir="data/raw")
df = loader.load_saved_data("BTC-USD", "2022-01-01", "2023-01-01")


In [None]:
# Feature engineering
fe = FeatureEngineer()
df_features = fe.add_technical_indicators(df)


In [None]:
# Normalize and create sequences
normalized_data = fe.normalize_data(df_features)
sequence_length = 30
X, y = fe.create_sequences(df_features, sequence_length=sequence_length, target_col='Close')


In [None]:
# Convert to PyTorch tensors
X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y, dtype=torch.float32).unsqueeze(1)


In [None]:
print("Input shape:", X_tensor.shape)
print("Target shape:", y_tensor.shape)


In [None]:
# Create DataLoader
batch_size = 32
dataset = TensorDataset(X_tensor, y_tensor)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


Initialize Transformer model

In [None]:
input_size = X_tensor.shape[2]  # number of features
d_model = 64
nhead = 4
num_encoder_layers = 2
dim_feedforward = 128
output_size = 1
dropout = 0.1


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


In [None]:
model = TransformerModel(
    input_size=input_size,
    d_model=d_model,
    nhead=nhead,
    num_encoder_layers=num_encoder_layers,
    dim_feedforward=dim_feedforward,
    output_size=output_size,
    dropout=dropout
).to(device)

print(model)

Loss & optimizer

In [None]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Training loop

In [None]:
num_epochs = 5  # for demo; increase for real training

model.train()
for epoch in range(num_epochs):
    epoch_loss = 0
    for xb, yb in dataloader:
        xb, yb = xb.to(device), yb.to(device)
        
        optimizer.zero_grad()
        output = model(xb)
        loss = criterion(output, yb)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(dataloader):.6f}")


Inference and plotting

In [None]:
model.eval()
with torch.no_grad():
    predictions = model(X_tensor.to(device)).cpu().numpy()


In [None]:

plt.figure(figsize=(12,5))
plt.plot(y, label='Actual Close')
plt.plot(predictions, label='Predicted Close')
plt.title("BTC-USD Transformer Predictions")
plt.legend()
plt.show()


save/load model

In [None]:
torch.save(model.state_dict(), 'transformer.pth')
model.load_state_dict(torch.load('transformer.pth'))