<a href="https://colab.research.google.com/github/davidb1026/route-optimization-transformer/blob/main/vanilla_transformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [19]:
# Create simple sine wave dataset. Label is the next data point

def generate_data(seq_length, num_samples):
    x = np.linspace(0, 100, num_samples)
    data = np.sin(x)
    sequences = []
    for i in range(len(data) - seq_length):
      seq = data[i:i+seq_length]
      label = data[i+seq_length]
      sequences.append((seq, label))
    return sequences

In [20]:
# Set dataset parameters

seq_length = 20
num_samples = 1000
data = generate_data(seq_length, num_samples)

In [21]:
# Split into train and test sets

train_size = int(0.8 * len(data))
train_data = data[:train_size]
test_data = data[train_size:]

In [22]:
# Create PyTorch tensors for train and test data

def to_tensor(data):
  sequences, labels = zip(*data)

  return torch.tensor(sequences, dtype=torch.float32), torch.tensor(labels, dtype=torch.float32)

train_sequences, train_labels = to_tensor(train_data)
test_sequences, test_labels = to_tensor(test_data)

In [23]:
# Define Transformer Model

class TransformerModel(nn.Module):

  def __init__(self, input_dim, model_dim, num_heads, num_layers, dropout=0.1):

    super(TransformerModel, self).__init__()
    self.model_dim = model_dim
    self.input_layer = nn.Linear(input_dim, model_dim)
    self.pos_encoder = nn.Parameter(torch.zeros(1, model_dim))
    self.transformer = nn.Transformer(d_model=model_dim,
                                      nhead=num_heads,
                                      num_encoder_layers=num_layers,
                                      num_decoder_layers=num_layers,
                                      dropout=dropout
                                      )
    self.output_layer = nn.Linear(model_dim, 1)

  def forward(self, x):
    x = self.input_layer(x) * np.sqrt(self.model_dim)
    x += self.pos_encoder
    x = self.transformer(x, x)
    x = self.output_layer(x)

    return x

In [24]:
# Set model parameters

input_dim = 1  # a number
model_dim = 64  # small model
num_heads = 8
num_layers = 3
dropout = 0.1

model = TransformerModel(input_dim, model_dim,
                         num_heads, num_layers, dropout)



In [25]:
# Train the model

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 50
model.train()

for epoch in range(num_epochs):
  optimizer.zero_grad()
  output = model(train_sequences.unsqueeze(-1))
  loss = criterion(output.squeeze(), train_labels)
  loss.backward()
  optimizer.step()

if (epoch + 1) % 10 == 0:
  print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (20) must match the size of tensor b (784) at non-singleton dimension 1

In [None]:
# Evaluate the model

# Evaluation
model.eval()
with torch.no_grad():
  predictions = model(test_sequences.unsqueeze(-1)).squeeze()
  test_loss = criterion(predictions, test_labels)
  print(f'Test Loss: {test_loss.item():.4f}')


In [None]:
# Plot results
import matplotlib.pyplot as plt

plt.plot(test_labels.numpy(), label='True')
plt.plot(predictions.numpy(), label='Predicted')
plt.legend()
plt.show()