In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
import numpy as np
import pandas as pd
import math
from sklearn.preprocessing import StandardScaler
import os

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [20]:
sequence_length = 50

def create_binary_X_Y(df):
    X = []
    Y = []
    for row_id in range(len(df) - sequence_length):
        for stock_id in range(1, 51):
            stock_columns = [col for col in df.columns if col.startswith(f'Stock_{stock_id}_')]
            for stock_id_tmp in range(1, 51):
                stock_columns.append(f'Stock_{stock_id_tmp}')
            X_mid = []
            for seq_id in range(sequence_length):  
                x_mid = []
                for column_name in stock_columns:
                    x_mid.append(df.iloc[row_id + seq_id][column_name])
                x_mid.append(stock_id)
                X_mid.append(x_mid)
            X.append(X_mid)
            price_change = df.iloc[row_id + sequence_length][f'Stock_{stock_id}'] - df.iloc[row_id + sequence_length - 1][f'Stock_{stock_id}']
            Y.append(1 if price_change > 0 else 0)
    
    X = np.array(X)
    Y = np.array(Y) 
    return X, Y

In [64]:
def save_binary_data(X, Y, filename_prefix):
    np.save(f"{filename_prefix}_X.npy", X)
    np.save(f"{filename_prefix}_Y.npy", Y)
    print(f"Data saved as {filename_prefix}_X.npy and {filename_prefix}_Y.npy")

def load_binary_data(filename_prefix):
    X = np.load(f"{filename_prefix}_X.npy")
    Y = np.load(f"{filename_prefix}_Y.npy")
    print(f"Data loaded from {filename_prefix}_X.npy and {filename_prefix}_Y.npy")
    return X, Y

In [88]:
# Check if processed data already exists
if os.path.exists("train_X.npy") and os.path.exists("train_Y.npy"):
    X_train, Y_train = load_binary_data("train")
    X_test, Y_test = load_binary_data("test")
else:
    df = pd.read_csv('stock_data_with_indicators.csv')
    training_org_df = df[21:375]
    testing_org_df = df[375:]
    X_train, Y_train = create_binary_X_Y(training_org_df)
    X_test, Y_test = create_binary_X_Y(testing_org_df)
    
    # Save the processed data
    save_binary_data(X_train, Y_train, "train")
    save_binary_data(X_test, Y_test, "test")

Data loaded from train_X.npy and train_Y.npy
Data loaded from test_X.npy and test_Y.npy


In [89]:
# Normalize the input data
scaler = StandardScaler()
X_train_reshaped = X_train.reshape(-1, X_train.shape[-1])
X_train_scaled = scaler.fit_transform(X_train_reshaped).reshape(X_train.shape)
X_test_reshaped = X_test.reshape(-1, X_test.shape[-1])
X_test_scaled = scaler.transform(X_test_reshaped).reshape(X_test.shape)

print(f'X_train.shape: {X_train_scaled.shape}')
print(f'Y_train.shape: {Y_train.shape}')
print(f'X_test.shape: {X_test_scaled.shape}')
print(f'Y_test.shape: {Y_test.shape}')

X_train.shape: (15200, 50, 63)
Y_train.shape: (15200, 1)
X_test.shape: (3750, 50, 63)
Y_test.shape: (3750, 1)


In [104]:
class StockPriceTransformer(nn.Module):
    def __init__(self, input_dim, d_model, nhead, num_layers, dim_feedforward, dropout=0.1):
        super(StockPriceTransformer, self).__init__()
        self.embedding = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
        self.fc1 = nn.Linear(d_model, 64)
        self.fc2 = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)

        self.residual_fc = nn.Linear(input_dim, d_model)

    def forward(self, src):
        # Residual connection
        residual = self.residual_fc(src)

        src = self.embedding(src)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)

        # Add residual connection
        output = output + residual

        output = output.mean(dim=1)  # Global average pooling
        output = self.dropout(self.relu(self.fc1(output)))
        output = self.fc2(output)
        return torch.sigmoid(output)

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(1), :].transpose(0, 1)
        return self.dropout(x)


In [105]:
# Hyperparameters
batch_size = 64
learning_rate = 0.001
num_epochs = 100
d_model = 512
nhead = 16
num_layers = 8
dim_feedforward = 512
validation_split = 0.2

print(X_train.shape)
print(Y_train.shape)

# Prepare data
X_train = torch.FloatTensor(X_train_scaled).to(device)
Y_train = torch.FloatTensor(Y_train).squeeze(0).to(device)
X_test = torch.FloatTensor(X_test_scaled).to(device)
Y_test = torch.FloatTensor(Y_test).squeeze(0).to(device)

print(X_train.shape)
print(Y_train.shape)

dataset = TensorDataset(X_train, Y_train)
train_size = int((1 - validation_split) * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(TensorDataset(X_test, Y_test), batch_size=batch_size)

input_dim = X_train.shape[2]
model = StockPriceTransformer(input_dim, d_model, nhead, num_layers, dim_feedforward).to(device)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

torch.Size([15200, 50, 63])
torch.Size([15200, 1])
torch.Size([15200, 50, 63])
torch.Size([15200, 1])


In [106]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

# Assuming your model is called 'loaded_transformer'
total_params = count_parameters(model)
print(f"The model has {total_params:,} trainable parameters")

The model has 12,722,305 trainable parameters


In [107]:

# Training loop
for epoch in range(num_epochs):
    model.train()
    total_train_loss = 0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        batch_y = batch_y.squeeze(1)
        outputs = outputs.squeeze(1)
        loss = criterion(outputs, batch_y)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        total_train_loss += loss.item()
    
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for batch_X, batch_y in val_loader:
            outputs = model(batch_X)
            val_loss = criterion(outputs, batch_y)
            total_val_loss += val_loss.item()
    
    avg_train_loss = total_train_loss / len(train_loader)
    avg_val_loss = total_val_loss / len(val_loader)
    
    scheduler.step(avg_val_loss)
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')

Epoch [1/100], Train Loss: 0.6960, Val Loss: 0.6905
Epoch [2/100], Train Loss: 0.6923, Val Loss: 0.6907
Epoch [3/100], Train Loss: 0.6923, Val Loss: 0.6918


KeyboardInterrupt: 

In [None]:

# Final evaluation
model.eval()
total_test_loss = 0
all_predictions = []
all_labels = []

with torch.no_grad():
    for batch_X, batch_y in test_loader:
        outputs = model(batch_X)
        test_loss = criterion(outputs, batch_y)
        total_test_loss += test_loss.item()
        predictions = (outputs > 0.5).float()
        all_predictions.extend(predictions.cpu().numpy())
        all_labels.extend(batch_y.cpu().numpy())

avg_test_loss = total_test_loss / len(test_loader)
accuracy = np.mean(np.array(all_predictions) == np.array(all_labels))

print(f'Test Loss: {avg_test_loss:.4f}')
print(f'Test Accuracy: {accuracy:.4f}')