In [2]:
import torch 
import torch.nn as nn
from torch.nn import TransformerEncoderLayer, TransformerEncoder
import numpy as np
import pandas as pd

In [3]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-np.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        pe = pe.transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1)]
        return self.dropout(x)
        

In [4]:

class BTCTransformer(nn.Module):
    def __init__(self, input_features = 5,
                 d_model = 256,
                 nhead = 8,
                 num_encoder_layers = 6,
                 dim_feedforward = 1024,
                 dropout = 0.1,
                 sequence_length = 168,
                 activation = 'gelu'
                 ):
        super().__init__()
        self.sequence_length = sequence_length
        self.input_embedding = nn.Linear(input_features, d_model)
        self.pos_encoder = PositionalEncoding(d_model, dropout, max_len=sequence_length)
        
        encoder_layer = TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            activation=activation,
            batch_first=True
        )
        
        self.transformer_encoder = TransformerEncoder(
            encoder_layer,
            num_layers=num_encoder_layers
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(d_model, 128),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(128,64),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(64,1)
        )
    
    def forward(self,x):
        
        x = self.input_embedding(x)
        x = self.pos_encoder(x)
        transformer_out = self.transformer_encoder(x)
        last_hidden = transformer_out[:,-1,:]
        out = self.decoder(last_hidden)
        return out
    

In [28]:
def preprocess_data(df, sequence_length = 168):
    
    features = ['open', 'high', 'low', 'close', "volume"]
    normalized_data = {}
    
    for feature in features:
        min_val = df[feature].min()
        max_val = df[feature].max()
        normalized_data[feature] = (df[feature] - min_val) / (max_val - min_val)

    X, y = [], []
    data = np.column_stack([normalized_data[f] for f in features])
    
    for i in range(len(df) - sequence_length):
        X.append(data[i:(i + sequence_length)])
        y.append(normalized_data['close'][i + sequence_length])
    
    return torch.FloatTensor(X), torch.FloatTensor(y).reshape(-1, 1)


In [29]:
device = "mps"

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt


            
def train_transformer(model, train_data, val_data, 
                     batch_size=32, 
                     epochs=300,  # Increased epochs
                     learning_rate=0.0001,
                     device='mps' if torch.backends.mps.is_available() else 'cpu'):
    
    print(f"Using device: {device}")
    model = model.to(device)
    
    train_loader = DataLoader(TensorDataset(train_data[0], train_data[1]), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TensorDataset(val_data[0], val_data[1]), batch_size=batch_size)
    
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-5)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.8, patience=5, verbose=True)
    criterion = nn.MSELoss()
    
    train_losses, val_losses = [], []
    best_val_loss = float('inf')
    best_model_state = None
    
    for epoch in range(epochs):
        model.train()
        train_loss, train_batches = 0, 0
        pbar = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{epochs}')
        
        for X_batch, y_batch in pbar:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            
            train_loss += loss.item()
            train_batches += 1
            pbar.set_postfix({'training_loss': train_loss/train_batches})
        
        avg_train_loss = train_loss / train_batches
        train_losses.append(avg_train_loss)
        
        model.eval()
        val_loss, val_batches = 0, 0
        
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                y_pred = model(X_batch)
                loss = criterion(y_pred, y_batch)
                val_loss += loss.item()
                val_batches += 1
        
        avg_val_loss = val_loss / val_batches
        val_losses.append(avg_val_loss)
        
        scheduler.step(avg_val_loss)
        
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_model_state = model.state_dict().copy()
        
        print(f'Epoch {epoch + 1}: Train Loss = {avg_train_loss:.6f}, Val Loss = {avg_val_loss:.6f}')
    
    # Load best model
    model.load_state_dict(best_model_state)
    
    return model, {'train_losses': train_losses, 'val_losses': val_losses}

def plot_training_history(history):
    plt.figure(figsize=(10, 6))
    plt.plot(history['train_losses'], label='Training Loss')
    plt.plot(history['val_losses'], label='Validation Loss')
    plt.title('Model Loss Over Time')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.show()


In [None]:
data = pd.read_csv('/Users/anurag2506/Desktop/untitled folder/Transformers_BTC_price_predictoin/data_hr.csv')
df = pd.DataFrame(data)

In [None]:
df['datetime'] = pd.to_datetime(df['datetime'])

In [None]:
df.set_index('datetime', inplace=True)

In [None]:
df = df.drop(columns=['Unnamed: 0'])

In [47]:
df.columns

Index(['Unnamed: 0', 'datetime', 'open', 'high', 'low', 'close', 'volume'], dtype='object')

In [None]:

def main(df):
    
    X, y = preprocess_data(df, sequence_length=168)

    train_size = int(0.7 * len(X))
    val_size = int(0.15 * len(X))
    train_X, train_y = X[:train_size], y[:train_size]
    val_X, val_y = X[train_size:train_size+val_size], y[train_size:train_size+val_size]
    test_X, test_y = X[train_size+val_size:], y[train_size+val_size:]
    
    model = BTCTransformer()
    model.to('mps')
    trained_model, history = train_transformer(
        model=model,
        train_data=(train_X, train_y),
        val_data=(val_X, val_y),
        batch_size=32,
        epochs=50,
        learning_rate=0.0001
    )

    plot_training_history(history)
    
    torch.save({
        'model_state_dict': trained_model.state_dict(),
        'training_history': history
    }, 'btc_transformer_model.pth')

In [51]:
main(df)



Using device: mps


Epoch 1/100:  22%|██▏       | 181/824 [02:05<07:24,  1.45it/s, training_loss=0.00583]


KeyboardInterrupt: 