<a href="https://colab.research.google.com/github/jacobstac/forcasting_with_tranformers/blob/main/basic_time_series_transformer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch.nn as nn, torch
from pprint import pprint
import numpy as np

In [3]:
#data 1
dataset = np.array([
  [([0,1,0,1,0,1]), ([[0], [1]])],
  [([1,0,1,0,1,0]), ([[1], [0]])],
  [([0,0,0,0,0,0]), ([[0], [0]])],
  [([1,1,1,1,1,1]), ([[1], [1]])],
  [([1,1,0,0,1,1]), ([[0], [0]])],
  [([0,0,1,1,0,0]), ([[1], [1]])]
])
print(dataset.shape)
seq_len = len(dataset[0][0])
print(seq_len)

(6, 2)
6


In [4]:
dataset[0][0], dataset[0][1]

([0, 1, 0, 1, 0, 1], [[0], [1]])

In [6]:
class TransformerModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.embed_src    = nn.Embedding(seq_len, 10)  # 2 options 0 or 1
    self.embed_target = nn.Embedding(seq_len, 10)  # 2 options 0 or 1
    self.transformer = nn.Transformer(10, 2) # embedding size as first argument
    self.lin = nn.Linear(10, 2)              # reduce from 10 to 2
    self.softmax = nn.Softmax(dim=-1)         # for displaying probability
    
  def forward(self, inp, tgt):
    embed_src = self.embed_src(inp)
    embed_target = self.embed_target(tgt)
    
    output = self.transformer(embed_src.view(len(inp), 1, -1), embed_target.view(len(tgt), 1, -1))
    # transformer takes seq len,batch size, embedding size of src and tgt seq
    
    output = self.lin(output)
    
    print("output.shape: ", output.shape, "output", output)
    print("embed_target: ", embed_target.view(2,1, -1))
    print("softmax prob: ", self.softmax(output))
    
    return output.permute(0, 2, 1)

In [7]:
model = TransformerModel()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [8]:
tensor_dataset = []

print(dataset[0][0])
for i in range (seq_len):
  tensor_dataset.append([torch.tensor(dataset[i][0]), torch.tensor(dataset[i][1])])

[0, 1, 0, 1, 0, 1]


In [9]:
pprint(tensor_dataset)

[[tensor([0, 1, 0, 1, 0, 1]), tensor([[0],
        [1]])],
 [tensor([1, 0, 1, 0, 1, 0]), tensor([[1],
        [0]])],
 [tensor([0, 0, 0, 0, 0, 0]), tensor([[0],
        [0]])],
 [tensor([1, 1, 1, 1, 1, 1]), tensor([[1],
        [1]])],
 [tensor([1, 1, 0, 0, 1, 1]), tensor([[0],
        [0]])],
 [tensor([0, 0, 1, 1, 0, 0]), tensor([[1],
        [1]])]]


In [10]:
criterion = nn.CrossEntropyLoss()

In [12]:
for i in range(100):
  optimizer.zero_grad()
  print(tensor_dataset[i % seq_len][0], tensor_dataset[i % seq_len][1])
  loss = criterion(model(tensor_dataset[i % seq_len][0], tensor_dataset[i % seq_len][1]), tensor_dataset[i % seq_len][1])
  print("loss: ", loss)
  loss.backward()
  optimizer.step()

tensor([0, 1, 0, 1, 0, 1]) tensor([[0],
        [1]])
output.shape:  torch.Size([2, 1, 2]) output tensor([[[ 2.1208, -2.7316]],

        [[-2.0052,  2.4762]]], grad_fn=<AddBackward0>)
embed_target:  tensor([[[ 0.3170, -1.2659, -1.7323, -0.3029,  0.6377,  0.4199, -0.0482,
          -1.0365, -1.7740, -1.6432]],

        [[-0.0873, -0.6005,  0.8305,  2.0160,  0.6666, -1.5324,  0.1599,
          -0.9006, -0.0457,  0.9033]]], grad_fn=<ViewBackward>)
softmax prob:  tensor([[[0.9923, 0.0077]],

        [[0.0112, 0.9888]]], grad_fn=<SoftmaxBackward>)
loss:  tensor(0.0095, grad_fn=<NllLoss2DBackward>)
tensor([1, 0, 1, 0, 1, 0]) tensor([[1],
        [0]])
output.shape:  torch.Size([2, 1, 2]) output tensor([[[-2.1521,  2.0348]],

        [[ 1.9392, -2.6601]]], grad_fn=<AddBackward0>)
embed_target:  tensor([[[-0.0873, -0.6005,  0.8305,  2.0160,  0.6666, -1.5324,  0.1599,
          -0.9006, -0.0457,  0.9033]],

        [[ 0.3170, -1.2659, -1.7323, -0.3029,  0.6377,  0.4199, -0.0482,
          -1.03