In [101]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

In [102]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'using device: {device}')

using device: cpu


In [103]:
 class EnhancedTransformer(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, num_heads, dropout):
        super(EnhancedTransformer, self).__init__()
        self.input_proj = nn.Linear(input_size, hidden_size)
        self.pos_encoder = PositionalEncoding(hidden_size, dropout)
        encoder_layers = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=num_heads, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)
        self.fc1 = nn.Linear(hidden_size, hidden_size)
        self.activation = nn.LeakyReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

        self.init_weights()

    def init_weights(self):
        for p in self.parameters():
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)

    def forward(self, x):
        x = self.input_proj(x)
        x = x.unsqueeze(1)  # Add sequence dimension
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = x.squeeze(1)
        x = self.fc1(x)
        x = self.activation(x)
        return self.fc2(x)

In [104]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

In [105]:
def generate_data(num_sequences, seq_length):
    sequences = []
    targets = []
    for _ in range(num_sequences):
        seq = torch.randint(1, 101, (seq_length,))  # Random integers between 1 and 100
        target = seq.sum() % 100 + 1  # Sum of sequence modulo 100, then add 1
        sequences.append(seq)
        targets.append(target)
    return torch.stack(sequences), torch.tensor(targets)


In [106]:
def create_X_Y(df):
    X = []
    Y = []
    for row_id in range(len(df) - 1):
        for stock_id in range(1, 51):
            stock_columns = [col for col in df.columns if col.startswith(f'Stock_{stock_id}_')]
            stock_columns.append(f'Stock_{stock_id}')
            x = []
            for column_name in stock_columns:
                x.append(df.iloc[row_id][column_name])
            
            X.append(x)
            Y.append(df.iloc[row_id + 1][f'Stock_{stock_id}'])
        
    
    X = np.array(X, dtype=np.float32)
    Y = np.array(Y, dtype=np.float32)
    return X, Y

In [107]:
df = pd.read_csv('stock_data_with_indicators.csv')
training_org_df = df[49:375]
testing_org_df = df[375:]

X_train, Y_train = create_X_Y(training_org_df)
print(X_train.shape)
print(Y_train.shape)

X_test, Y_test = create_X_Y(testing_org_df)
print(X_test.shape)
print(Y_test.shape)

(16250, 13)
(16250,)
(6200, 13)
(6200,)


In [108]:
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs, patience):
    best_val_loss = float('inf')
    epochs_without_improvement = 0

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for batch in train_loader:
            inputs, targets = batch
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), targets.float())
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch in val_loader:
                inputs, targets = batch
                outputs = model(inputs)
                loss = criterion(outputs.squeeze(), targets.float())
                val_loss += loss.item()

        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        scheduler.step(val_loss)

        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_without_improvement = 0
            torch.save(model.state_dict(), f'best_transformer_{best_val_loss}.pth')
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= patience:
                print(f'Early stopping after {epoch+1} epochs')
                break


In [109]:
X_train = torch.from_numpy(X_train).float().to(device)
X_val = torch.from_numpy(X_test).float().to(device)
y_train = torch.from_numpy(Y_train).float().to(device)
y_val = torch.from_numpy(Y_test).float().to(device)


# Create DataLoaders
train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)


In [110]:
# Initialize model
input_size = X_train.shape[1]  # Use the actual input size from your data
hidden_size = 512
num_layers = 6
output_size = 1
num_heads = 8
dropout = 0.1

model = EnhancedTransformer(input_size, hidden_size, num_layers, output_size, num_heads, dropout).to(device)

# Training setup
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.0001, weight_decay=1e-5)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100, eta_min=1e-6)



In [111]:
# Train the model
train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=100, patience=30)



Epoch 1/100, Train Loss: 131.7175, Val Loss: 7.2959
Epoch 2/100, Train Loss: 15.3511, Val Loss: 7.4493
Epoch 3/100, Train Loss: 15.5928, Val Loss: 4.5094
Epoch 4/100, Train Loss: 15.6664, Val Loss: 6.3537
Epoch 5/100, Train Loss: 12.1193, Val Loss: 12.1095
Epoch 6/100, Train Loss: 12.1105, Val Loss: 12.4185
Epoch 7/100, Train Loss: 12.2485, Val Loss: 2.6555
Epoch 8/100, Train Loss: 11.9049, Val Loss: 2.9350
Epoch 9/100, Train Loss: 11.1403, Val Loss: 2.7159
Epoch 10/100, Train Loss: 11.8879, Val Loss: 16.0398
Epoch 11/100, Train Loss: 10.7527, Val Loss: 4.1029
Epoch 12/100, Train Loss: 12.2218, Val Loss: 8.5204
Epoch 13/100, Train Loss: 9.7154, Val Loss: 8.6508
Epoch 14/100, Train Loss: 10.4753, Val Loss: 6.9471
Epoch 15/100, Train Loss: 9.8447, Val Loss: 3.4138
Epoch 16/100, Train Loss: 11.1907, Val Loss: 2.9764
Epoch 17/100, Train Loss: 11.6451, Val Loss: 6.8739
Epoch 18/100, Train Loss: 9.6926, Val Loss: 1.9507
Epoch 19/100, Train Loss: 9.6041, Val Loss: 6.4024
Epoch 20/100, Train L

KeyboardInterrupt: 

In [84]:
print(f'{Y_test[:3]}')

[13.37 63.58 47.18]


In [85]:
model.eval()
with torch.no_grad():
    test_sequence = torch.tensor(X_test[:3])
    prediction = model(test_sequence)
    print(f'prediction: {prediction}')

prediction: tensor([[15.4798],
        [65.6930],
        [53.4870]])


In [86]:
def save_model(model, path):
    torch.save(model.state_dict(), path)
    print(f"Model saved to {path}")

In [87]:
save_model(model, 'transformer_model.pth')

Model saved to transformer_model.pth


In [88]:
def load_model(model, path, device):
    model.load_state_dict(torch.load(path, map_location=device))
    model.eval()
    return model

In [89]:
input_size = 13  # Adjust based on your input size
hidden_size = 512
num_layers = 6
output_size = 1
num_heads = 8
dropout = 0.1

# Initialize a new model with the same architecture
loaded_model = EnhancedTransformer(input_size, hidden_size, num_layers, output_size, num_heads, dropout).to(device)

# Load the saved weights
loaded_model = load_model(loaded_model, 'transformer_model.pth', device)

# Now you can use loaded_model for inference
loaded_model.eval()
with torch.no_grad():
    test_sequence = torch.tensor(X_test[:3]).to(device)
    prediction = loaded_model(test_sequence)
    print(f'Prediction from loaded model: {prediction}')

Prediction from loaded model: tensor([[15.4798],
        [65.6930],
        [53.4870]])
