In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

In [2]:
# Parameters
seq_len = 10  # window size for input sequences
data_columns = ['d1', 'd2', 'd3', 'd4', 'd5', 'd6', 'bonus']

num_epochs = 100    # Epoch 100/100, Loss: 8.0247
#vnum_epochs = 50    # Epoch 50/50, Loss: 10.7304
# num_epochs = 25    # Epoch 25/25, Loss: 14.2781
# num_epochs = 10    #original 10
# learning_rate = 0.01   # Epoch 10/10, Loss: 19.3940
learning_rate = 0.001    # original Epoch 10/10, Loss: 18.2901
# batch size hyperparameter
batch_size = 32
# key hyperparameter that controls the size of the embedding vectors for categorical values
embedding_dim = 64

In [3]:
# Read and preprocess data
file_path = '../data/data_all_l649.xlsx'
df = pd.read_excel(file_path)
df = df.reset_index(drop=True)
df = df.drop(columns=['date'])
input_data = df[data_columns].values.astype(int)
print(f"input data shape: {input_data.shape}")
print(f"input data type: {input_data.dtype}")
print(input_data[:5])  # Display first 5 rows of labels
print('...')
print(input_data[-5:])  # Display last 5 rows of labels
print("Data loaded and preprocessed successfully.")

input data shape: (4341, 7)
input data type: int64
[[ 3 11 12 14 41 43 13]
 [ 8 33 36 37 39 41  9]
 [ 1  6 23 24 27 39 34]
 [ 3  9 10 13 20 43 34]
 [ 5 14 21 31 34 47 45]]
...
[[ 4  9 11 12 42 49 41]
 [ 7 18 23 35 48 49 19]
 [24 29 31 33 36 37 21]
 [ 3 10 12 14 36 41  1]
 [ 5 16 17 21 41 47  6]]
Data loaded and preprocessed successfully.


In [4]:
# Check if CUDA is available and set device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [5]:
# Create input/output sequences based on seq_len size of the window.
# for each target create a sequence of seq_len preceeding inputs
inputs = []
targets = []
for i in range(len(input_data) - seq_len):
    seq = input_data[i:i+seq_len]           # shape: [seq_len, 7]
    tgt = input_data[i+seq_len]             # shape: [7]
    inputs.append(seq)
    targets.append(tgt)

inputs = np.array(inputs)    # shape: [num_samples, seq_len, 7]
targets = np.array(targets)  # shape: [num_samples, 7]
inputs = torch.tensor(inputs, dtype=torch.long).to(device)
targets = torch.tensor(targets, dtype=torch.long).to(device)
print(f"inputs: {inputs.shape}, targets shape: {targets.shape}")
print(inputs[0])  # Display first input sequence
print(targets[0])  # Display first target sequence

inputs: torch.Size([4331, 10, 7]), targets shape: torch.Size([4331, 7])
tensor([[ 3, 11, 12, 14, 41, 43, 13],
        [ 8, 33, 36, 37, 39, 41,  9],
        [ 1,  6, 23, 24, 27, 39, 34],
        [ 3,  9, 10, 13, 20, 43, 34],
        [ 5, 14, 21, 31, 34, 47, 45],
        [ 8, 20, 21, 25, 31, 41, 33],
        [18, 25, 28, 33, 36, 42,  7],
        [ 7, 16, 17, 31, 40, 48, 26],
        [ 5, 10, 23, 27, 37, 38, 33],
        [ 4, 15, 30, 37, 46, 48,  3]], device='cuda:0')
tensor([ 7,  9, 21, 33, 38, 42, 45], device='cuda:0')


In [6]:
class SequenceDataset(Dataset):
    """
    PyTorch Dataset for sequence modeling.
    Each item is a tuple (X, y) where:
      - X: input tensor of shape [seq_len, 7] (sequence of events)
      - y: target tensor of shape [7] (next event to predict)
    Used for batching input/output pairs for transformer models.
    """
    def __init__(self, X, y):
        self.X = X
        self.y = y
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [7]:
# Create a dataset that has corresponding inputs and targets
dataset = SequenceDataset(inputs, targets)
# Create a dataloader for a defined batch size, shuffle is False because order is important
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
print(f"Dataset length: {len(dataset)}")
print(f"First sample: {dataset[0]}")
print(f"Last sample: {dataset[-1]}")

Dataset length: 4331
First sample: (tensor([[ 3, 11, 12, 14, 41, 43, 13],
        [ 8, 33, 36, 37, 39, 41,  9],
        [ 1,  6, 23, 24, 27, 39, 34],
        [ 3,  9, 10, 13, 20, 43, 34],
        [ 5, 14, 21, 31, 34, 47, 45],
        [ 8, 20, 21, 25, 31, 41, 33],
        [18, 25, 28, 33, 36, 42,  7],
        [ 7, 16, 17, 31, 40, 48, 26],
        [ 5, 10, 23, 27, 37, 38, 33],
        [ 4, 15, 30, 37, 46, 48,  3]], device='cuda:0'), tensor([ 7,  9, 21, 33, 38, 42, 45], device='cuda:0'))
Last sample: (tensor([[ 4, 12, 19, 34, 40, 42,  5],
        [ 5,  8, 13, 26, 44, 49, 47],
        [11, 13, 15, 16, 38, 48, 25],
        [ 7, 11, 23, 29, 44, 46, 42],
        [ 1,  4,  5, 10, 37, 46, 24],
        [15, 17, 21, 22, 38, 45, 26],
        [ 4,  9, 11, 12, 42, 49, 41],
        [ 7, 18, 23, 35, 48, 49, 19],
        [24, 29, 31, 33, 36, 37, 21],
        [ 3, 10, 12, 14, 36, 41,  1]], device='cuda:0'), tensor([ 5, 16, 17, 21, 41, 47,  6], device='cuda:0'))


In [8]:
# Define the SimpleTransformer model class
class SimpleTransformer(nn.Module):
    def __init__(self, vocab_size=50, embed_dim=embedding_dim, num_heads=2, num_layers=1, num_outputs=6):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.ModuleList([nn.Linear(embed_dim, vocab_size) for _ in range(num_outputs)])
        self.num_outputs = num_outputs
    def forward(self, x):
        # x shape: [batch_size, seq_len, 7]; used here for clarity and debugging
        batch_size, seq_len, num_features = x.shape
        # Embed each categorical value
        x = self.embedding(x)  # [batch_size, seq_len, 7, embed_dim]
        # Aggregate features for each event (mean or sum)
        x = x.mean(dim=2)      # [batch_size, seq_len, embed_dim]
        x = self.transformer(x)
        x = x[:, -1, :]        # use last token's output
        outs = [fc(x) for fc in self.fc]  # list of [batch_size, vocab_size]
        return outs


In [9]:
# Instantiate the transformer model for training
model = SimpleTransformer(num_outputs=6)  # 6 unique values
model = model.to(device)  # Move model to CUDA if available

# TensorBoard writer uses default directory ./runs
# to view results run tensorboard --logdir runs
writer = SummaryWriter()

# Training loop for the transformer model
# Use CrossEntropyLoss for each output
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch_X, batch_y in dataloader:
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)
        optimizer.zero_grad()
        outs = model(batch_X)  # list of 6 outputs, each [batch_size, vocab_size]
        # For each output, compute loss against the corresponding target value
        loss = 0
        for i in range(6):
            loss += criterion(outs[i], batch_y[:, i])
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")
    writer.add_scalar('Loss/train', avg_loss, epoch)

# Log hyperparameters and final loss to TensorBoard
hparams = {
    'seq_len': seq_len,
    'num_epochs': num_epochs,
    'batch_size': batch_size,
    'embedding_dim': embedding_dim,
    'learning_rate': learning_rate,
}
metrics = {
    'final_loss': avg_loss
}
writer.add_hparams(hparams, metrics)

print("Training complete.")
writer.close()


Epoch 1/100, Loss: 20.2424
Epoch 2/100, Loss: 19.5627
Epoch 3/100, Loss: 19.4348
Epoch 4/100, Loss: 19.2322
Epoch 5/100, Loss: 18.9010
Epoch 6/100, Loss: 18.3891
Epoch 7/100, Loss: 17.7762
Epoch 8/100, Loss: 17.0703
Epoch 9/100, Loss: 16.2756
Epoch 10/100, Loss: 15.4877
Epoch 11/100, Loss: 14.6198
Epoch 12/100, Loss: 13.8413
Epoch 13/100, Loss: 13.0380
Epoch 14/100, Loss: 12.3005
Epoch 15/100, Loss: 11.5331
Epoch 16/100, Loss: 10.8823
Epoch 17/100, Loss: 10.2136
Epoch 18/100, Loss: 9.6018
Epoch 19/100, Loss: 9.0635
Epoch 20/100, Loss: 8.6651
Epoch 21/100, Loss: 8.1887
Epoch 22/100, Loss: 7.7520
Epoch 23/100, Loss: 7.4086
Epoch 24/100, Loss: 7.0484
Epoch 25/100, Loss: 6.7314
Epoch 26/100, Loss: 6.4156
Epoch 27/100, Loss: 6.1746
Epoch 28/100, Loss: 5.9950
Epoch 29/100, Loss: 5.7612
Epoch 30/100, Loss: 5.4801
Epoch 31/100, Loss: 5.3728
Epoch 32/100, Loss: 5.1576
Epoch 33/100, Loss: 4.9202
Epoch 34/100, Loss: 4.7980
Epoch 35/100, Loss: 4.6782
Epoch 36/100, Loss: 4.5328
Epoch 37/100, Loss: 

In [10]:
# Prep last sequence of input data for inference
last_seq = torch.tensor(input_data[-seq_len:], dtype=torch.long).unsqueeze(0).to(device)  # shape: [1, seq_len, 7]

print(last_seq)

tensor([[[ 5,  8, 13, 26, 44, 49, 47],
         [11, 13, 15, 16, 38, 48, 25],
         [ 7, 11, 23, 29, 44, 46, 42],
         [ 1,  4,  5, 10, 37, 46, 24],
         [15, 17, 21, 22, 38, 45, 26],
         [ 4,  9, 11, 12, 42, 49, 41],
         [ 7, 18, 23, 35, 48, 49, 19],
         [24, 29, 31, 33, 36, 37, 21],
         [ 3, 10, 12, 14, 36, 41,  1],
         [ 5, 16, 17, 21, 41, 47,  6]]], device='cuda:0')


In [11]:
# Inference for the next event using the last seq_len inputs
# Take the last sequence from the input data
# last_seq = inputs[-1].unsqueeze(0).to(device)  # shape: [1, seq_len, 7], move to CUDA if available

with torch.no_grad():
    # Pass the last sequence to the model
    outs = model(last_seq)  # list of 6 outputs, each [1, vocab_size]
    print("Raw model outputs (logits) for each predicted value:")
    # for i, out in enumerate(outs):
    #     print(f"Output {i+1}:", out[0].cpu().numpy())
    pred_vals = []
    for out in outs:
        # For each output, get the predicted category
        val = torch.argmax(out[0])
        pred_vals.append(val.item())
    # Ensure uniqueness among the 6 predicted values
    unique_pred = []
    for val in pred_vals:
        if val not in unique_pred and len(unique_pred) < 6:
            unique_pred.append(val)
    # If not enough unique, fill with remaining unused values
    if len(unique_pred) < 6:
        unused = set(range(50)) - set(unique_pred)
        unique_pred += list(unused)[:6-len(unique_pred)]
    print(f"Predicted 6 unique values for the next event: {unique_pred}")


Raw model outputs (logits) for each predicted value:
Predicted 6 unique values for the next event: [9, 13, 29, 31, 38, 37]


In [12]:
# Save the trained model and all necessary info for later loading and inference
import os
save_dir = './saved_model'
os.makedirs(save_dir, exist_ok=True)
model_path = os.path.join(save_dir, 'simple_transformer.pth')
info_path = os.path.join(save_dir, 'model_info.pth')

# Save model state dict
torch.save(model.state_dict(), model_path)

# Save hyperparameters and other info needed for loading
model_info = {
    'vocab_size': 50,
    'embed_dim': embedding_dim,
    'num_heads': 2,
    'num_layers': 1,
    'num_outputs': 6,
    'seq_len': seq_len,
    'data_columns': data_columns,
}
torch.save(model_info, info_path)

print(f"Model and info saved to {save_dir}")

Model and info saved to ./saved_model


In [13]:
# Save the SimpleTransformer class definition to a Python file for reuse
class_code = '''import torch.nn as nn\n\nclass SimpleTransformer(nn.Module):\n    def __init__(self, vocab_size=50, embed_dim=32, num_heads=2, num_layers=1, num_outputs=6):\n        super().__init__()\n        self.embedding = nn.Embedding(vocab_size, embed_dim)\n        encoder_layer = nn.TransformerEncoderLayer(d_model=embed_dim, nhead=num_heads, batch_first=True)\n        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)\n        self.fc = nn.ModuleList([nn.Linear(embed_dim, vocab_size) for _ in range(num_outputs)])\n        self.num_outputs = num_outputs\n    def forward(self, x):\n        batch_size, seq_len, num_features = x.shape\n        x = self.embedding(x)  # [batch_size, seq_len, 7, embed_dim]\n        x = x.mean(dim=2)      # [batch_size, seq_len, embed_dim]\n        x = self.transformer(x)\n        x = x[:, -1, :]        # use last token's output\n        outs = [fc(x) for fc in self.fc]  # list of [batch_size, vocab_size]\n        return outs\n'''
with open('./saved_model/simple_transformer.py', 'w') as f:
    f.write(class_code)
print('SimpleTransformer class definition saved to ./saved_model/simple_transformer.py')

SimpleTransformer class definition saved to ./saved_model/simple_transformer.py
