<a href="https://colab.research.google.com/github/fannix/timeseries_generation/blob/master/attention.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import random
import torch

class PeriodicSeriesDataset(torch.utils.data.Dataset):
  def __init__(self, sequence):
    
    self.sequence = sequence
    self.start_symbol = "SOS"
    self.end_symbol = "EOS"
    self.x = []
    self.y = []
    self.id2word = {i+2: w for (i, w) in enumerate(sequence)}
    self.id2word[0] = self.start_symbol
    self.id2word[1] = self.end_symbol
    self.word2id = {w: i for (i, w) in self.id2word.items()}
    seq2id = np.array([self.word2id[w] for w in sequence])
    print(sequence)
    place_holder = np.zeros(len(sequence) + 2, dtype=np.int)
    place_holder[-1] = 1
    for i in range(len(sequence)):
      place_holder[1:-1] = np.roll(seq2id, i)
      self.x.append(place_holder.copy())
      place_holder[1:-1] = np.roll(seq2id, i+1)
      self.y.append(place_holder.copy())
      
  def __len__(self):
    return len(self.id2word) - 2

  def onehot_seq(self, word_seq):
    num_seq = [self.word2id[w] for w in word_seq]
    return self.onehot_num(num_seq)
  
  def onehot_num(self, num_seq):
    y = torch.LongTensor(num_seq).view(-1, 1)
    onehot = torch.FloatTensor(len(num_seq), len(self.word2id))
    onehot.zero_()
    onehot.scatter_(1, y, 1)
    return onehot
  
  def onecold_num(self, tensor):
    dim_n = tensor.shape[0]
    dim_c = tensor.shape[1]
    onecold = tensor.argmax(dim=1)
    return onecold

  def onecold_seq(self, tensor):
    onecold = self.onecold_num(tensor)
    print(onecold)
    return [self.id2word[i.item()] for i in onecold]

  def __getitem__(self, index):
    return  self.x[index], self.y[index]

import string
sequence = list(string.ascii_letters[:6])
pseries = PeriodicSeriesDataset(sequence)
for i in range(len(pseries)):
  print(pseries[i])

onehot = pseries.onehot_seq(['a', 'b', 'c'])
print(onehot)
onecold = pseries.onecold_seq(onehot)
print(onecold)

['a', 'b', 'c', 'd', 'e', 'f']
(array([0, 2, 3, 4, 5, 6, 7, 1]), array([0, 7, 2, 3, 4, 5, 6, 1]))
(array([0, 7, 2, 3, 4, 5, 6, 1]), array([0, 6, 7, 2, 3, 4, 5, 1]))
(array([0, 6, 7, 2, 3, 4, 5, 1]), array([0, 5, 6, 7, 2, 3, 4, 1]))
(array([0, 5, 6, 7, 2, 3, 4, 1]), array([0, 4, 5, 6, 7, 2, 3, 1]))
(array([0, 4, 5, 6, 7, 2, 3, 1]), array([0, 3, 4, 5, 6, 7, 2, 1]))
(array([0, 3, 4, 5, 6, 7, 2, 1]), array([0, 2, 3, 4, 5, 6, 7, 1]))
tensor([[0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0.]])
tensor([2, 3, 4])
['a', 'b', 'c']


In [208]:
batch_size = 5
nb_digits = 10
y = torch.LongTensor(batch_size,1).random_() % nb_digits
y
y_onehot = torch.FloatTensor(batch_size, nb_digits)
y_onehot.zero_()
y_onehot.scatter_(1, y, 1)

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]])

In [0]:
?y_onehot.scatter_

In [0]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [0]:
import torch
from torch import nn

from torch.utils.data import DataLoader


class DecoderRNN(nn.Module):
  def __init__(self, hidden_size, output_size):
    super().__init__()
    self.hidden_size = hidden_size

    self.embedding = nn.Embedding(output_size, hidden_size)
    self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
    self.out = nn.Linear(hidden_size, output_size)

  def forward(self, input, hc):
    output = self.embedding(input)
    output = torch.relu(output)
    output, (hidden, cell) = self.lstm(output, hc)
    #print(output.shape)
    output = self.out(output)
    return output, (hidden, cell)

  def init_hidden(self, batch_size):
    return (torch.zeros(1, batch_size, self.hidden_size, device=device), 
            torch.zeros(1, batch_size, self.hidden_size, device=device))


class EncoderRNN(nn.Module):
  def __init__(self, input_size, hidden_size):
    super().__init__()
    self.hidden_size = hidden_size

    self.embedding = nn.Embedding(input_size, hidden_size)
    self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)

  def forward(self, input, hc):
    embedded = self.embedding(input)
    output = embedded
    output, (hidden, cell) = self.lstm(output, hc)
    return output, (hidden, cell)

  def init_hidden(self, batch_size):
    return (torch.zeros(1, batch_size, self.hidden_size, device=device),
            torch.zeros(1, batch_size, self.hidden_size, device=device))

class Seq2Seq(nn.Module):
  def __init__(self, input_size, hidden_size):
    super().__init__()
    self.encoder = EncoderRNN(input_size, hidden_size)
    self.decoder = DecoderRNN(hidden_size, input_size)
  
  def forward(self, input, expected = None):
    batch_size = input.shape[0]
    h0, c0 = self.encoder.init_hidden(batch_size)
    encode_output, (encode_hidden, encode_cell) = self.encoder(input, (h0, c0))

    inp = torch.zeros(batch_size, dtype=torch.long)
    inp = inp.view(batch_size, 1)
    h, c = encode_hidden, encode_cell

    #print(inp.shape)
    output_list = []
    while True:
      if expected == None:
        out, (h, c) = self.decoder(inp, (h, c))
      # teacher forcing
      else:
        pass
      output_list.append(out.squeeze(1))
      if len(output_list) == input.shape[1] - 1:
        break
    # print(output_list[0].shape)
    return torch.stack(output_list, 2)

loader = DataLoader(pseries, 4)

model = Seq2Seq(len(pseries.word2id), 20)

criterion = torch.nn.functional.cross_entropy
optimizer = torch.optim.RMSprop(model.parameters())
for epoch in range(100):
  sum_criterion = 0
  n_instance = 0
  for x, y in loader:
    n_instance += x.shape[0]
    optimizer.zero_grad()
    res = model(x)
    loss = criterion(res, y[:, 1:])
    loss.backward()
    sum_criterion += loss.item()
    
    optimizer.step()

  print(f'{epoch}: {sum_criterion/n_instance}')
  #print(res.shape)



In [233]:
res.argmax(dim=1)

tensor([[3, 4, 5, 6, 7, 2, 1],
        [2, 3, 4, 5, 6, 7, 1]])

In [234]:
y

tensor([[0, 3, 4, 5, 6, 7, 2, 1],
        [0, 2, 3, 4, 5, 6, 7, 1]])

In [237]:
model(
    torch.LongTensor(
    [[0, 2, 3, 4, 5, 6, 7, 1],
     [0, 4, 5, 6, 7, 2, 3, 1]
     ])
).argmax(dim=1)

tensor([[7, 2, 3, 4, 5, 6, 1],
        [3, 4, 5, 6, 7, 2, 1]])

In [238]:
model(
    torch.LongTensor(
    [[0, 2, 3, 4, 5, 6, 1],
     [0, 4, 5, 6, 7, 2, 1]
     ])
).argmax(dim=1)

tensor([[6, 7, 2, 3, 4, 5],
        [2, 3, 4, 5, 6, 7]])

In [0]:
model(
    torch.LongTensor(
    [[0, 3, 5, 6, 2, 7, 4, 1],
     [0, 5, 6, 2, 3, 7, 4, 1]
     ])
).argmax(dim=1)

In [172]:
criterion(res, y[:, 1:])

tensor(0.0118, grad_fn=<NllLoss2DBackward>)

In [188]:
y[:, 1:].shape

torch.Size([2, 7])

In [189]:
res.shape

torch.Size([2, 8, 7])

In [202]:
criterion(torch.rand((1, 8, 7)), y[1:, 1:])

tensor(2.1528)

In [204]:
len(pseries)

6

In [179]:
input = torch.randn(4, 8, requires_grad=True)
target = torch.randint(8, (4,), dtype=torch.int64)

torch.nn.functional.cross_entropy(input, target)

tensor(2.0650, grad_fn=<NllLossBackward>)

Attention LSTM Seq2Seq

In [50]:
from torch.utils import data
from random import choice, randrange
class ReverseDataset(data.Dataset):
    """
    Inspired from https://talbaumel.github.io/blog/attention/
    """
    def __init__(self, min_length=5, max_length=20, type='train'):
        self.SOS = "<s>"  
        self.EOS = "</s>" 
        self.characters = list("abcd")
        self.int2char = list(self.characters)
        self.char2int = {c: i+2 for i, c in enumerate(self.characters)}
        self.VOCAB_SIZE = len(self.characters)
        self.min_length = min_length
        self.max_length = max_length
        if type=='train':
            self.set = [self._sample() for _ in range(3000)]
        else:
            self.set = [self._sample() for _ in range(300)]

    def __len__(self):
        return len(self.set)

    def __getitem__(self, item):
        return self.set[item]

    def _sample(self):
        if self.min_length != self.max_length:
            random_length = randrange(self.min_length, self.max_length)# Pick a random length
        else:
            random_length = self.min_length
        random_char_list = [choice(self.characters[:-1]) for _ in range(random_length)]  # Pick random chars
        random_string = ''.join(random_char_list)
        a = np.array([self.char2int.get(x) for x in random_string])
        b = np.array([self.char2int.get(x) for x in random_string[::-1]]) # Return the random string and its reverse
        #x = np.zeros((random_length, self.VOCAB_SIZE))
        #x[np.arange(random_length), a-2] = 1
        return a, b

reverse_dataset = ReverseDataset(4, 4)
reverse_dataset[0]

(array([3, 3, 2, 2]), array([2, 2, 3, 3]))

In [52]:
import torch
from torch import nn
import math

from torch.utils.data import DataLoader

class AttenEncoderRNN(nn.Module):
  def __init__(self, input_size, hidden_size):
    super().__init__()
    self.hidden_size = hidden_size

    self.embedding = nn.Embedding(input_size, hidden_size)
    self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)

  def forward(self, input, hc):
    embedded = self.embedding(input)
    output = embedded
    output, (hidden, cell) = self.lstm(output, hc)
    return output, (hidden, cell)

  def init_hidden(self, batch_size):
    return (torch.zeros(1, batch_size, self.hidden_size, device=device),
            torch.zeros(1, batch_size, self.hidden_size, device=device))

class AttenDecoderRNN(nn.Module):
  def __init__(self, hidden_size, output_size):
    super().__init__()
    self.hidden_size = hidden_size

    self.embedding = nn.Embedding(output_size, hidden_size)
    self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
    self.out = nn.Linear(hidden_size, output_size)
    self.combine = nn.Linear(2 * hidden_size, hidden_size)

  def forward(self, input, hc, encode_out):
    embed = self.embedding(input)
    attn = attention(embed, encode_out, encode_out)
    comb = self.combine(torch.cat([embed, attn[0]], -1))
    output = torch.relu(comb)
    output, (hidden, cell) = self.lstm(output, hc)
    #print(output.shape)
    output = self.out(output)
    return output, (hidden, cell)

  def init_hidden(self, batch_size):
    return (torch.zeros(1, batch_size, self.hidden_size, device=device), 
            torch.zeros(1, batch_size, self.hidden_size, device=device))

def attention(query, key, value, mask=None, dropout=None):
    "Compute 'Scaled Dot Product Attention'"
    d_k = query.size(-1)
    scores = torch.matmul(query, key.transpose(-2, -1)) \
             / math.sqrt(d_k)
    if mask is not None:
        scores = scores.masked_fill(mask == 0, -1e9)
    p_attn = torch.nn.functional.softmax(scores, dim = -1)
    if dropout is not None:
        p_attn = dropout(p_attn)
    return torch.matmul(p_attn, value), p_attn

class AttenSeq2Seq(nn.Module):
  def __init__(self, input_size, hidden_size):
    super().__init__()
    self.encoder = AttenEncoderRNN(input_size, hidden_size)
    self.decoder = AttenDecoderRNN(hidden_size, input_size)
  
  def forward(self, input, expected = None):
    batch_size = input.shape[0]
    h0, c0 = self.encoder.init_hidden(batch_size)
    encode_output, (encode_hidden, encode_cell) = self.encoder(input, (h0, c0))

    inp = torch.zeros(batch_size, dtype=torch.long)
    inp = inp.view(batch_size, 1)
    h, c = encode_hidden, encode_cell
    #print(inp.shape)
    output_list = []
    while True:
      if expected == None:
        out, (h, c) = self.decoder(inp, (h, c), encode_output)
      # teacher forcing
      else:
        pass
      output_list.append(out.squeeze(1))
      if len(output_list) == input.shape[1] - 1:
        break
    # print(output_list[0].shape)
    return torch.stack(output_list, 2)

loader = DataLoader(pseries, 4)

model = AttenSeq2Seq(len(pseries.word2id), 20)

criterion = torch.nn.functional.cross_entropy
optimizer = torch.optim.RMSprop(model.parameters())
for epoch in range(100):
  sum_criterion = 0
  n_instance = 0
  for x, y in loader:
    n_instance += x.shape[0]
    optimizer.zero_grad()
    res = model(x)
    loss = criterion(res, y[:, 1:])
    loss.backward()
    sum_criterion += loss.item()
    
    optimizer.step()

  print(f'{epoch}: {sum_criterion/n_instance}')
  #print(res.shape)



0: 0.7073964277903239
1: 0.6894136269887289
2: 0.6553395390510559
3: 0.6493597229321798
4: 0.6343182524045309
5: 0.6230777104695638
6: 0.5965317090352377
7: 0.5925294756889343
8: 0.6004666487375895
9: 0.5734504461288452
10: 0.5370506048202515
11: 0.5012724796930949
12: 0.4753237764040629
13: 0.45653937260309857
14: 0.5016976594924927
15: 0.4651162425676982
16: 0.4003055691719055
17: 0.40293802817662555
18: 0.37850889563560486
19: 0.29541797439257306
20: 0.26561102271080017
21: 0.23980106910069784
22: 0.2185211032629013
23: 0.18237103521823883
24: 0.1992823084195455
25: 0.17489262421925864
26: 0.19615734120210013
27: 0.2493083874384562
28: 0.13789314031600952
29: 0.10314985116322835
30: 0.0939429799715678
31: 0.08812969426314037
32: 0.07513278971115749
33: 0.06802314519882202
34: 0.06347364435593288
35: 0.05781300365924835
36: 0.05611333747704824
37: 0.05146181335051855
38: 0.04876300940910975
39: 0.0473285677532355
40: 0.043857697397470474
41: 0.047723001490036644
42: 0.042214468121528

In [51]:
reverse_dataset = ReverseDataset(20, 20)

loader = DataLoader(reverse_dataset, 4)

model = AttenSeq2Seq(len(reverse_dataset.char2int) + 2, 20)

criterion = torch.nn.functional.cross_entropy
optimizer = torch.optim.RMSprop(model.parameters())
for epoch in range(100):
  sum_criterion = 0
  n_instance = 0
  for x, y in loader:
    n_instance += x.shape[0]
    optimizer.zero_grad()
    res = model(x)
    loss = criterion(res, y)
    loss.backward()
    sum_criterion += loss.item()
    
    optimizer.step()

  print(f'{epoch}: {sum_criterion/n_instance}')

ValueError: ignored

In [53]:
model(
    torch.LongTensor(
    [[0, 2, 3, 4, 5, 6, 7, 1],
     [0, 4, 5, 6, 7, 2, 3, 1]
     ])
).argmax(dim=1)

tensor([[7, 2, 3, 4, 5, 6, 1],
        [3, 4, 5, 6, 7, 2, 1]])

In [56]:
model(
    torch.LongTensor(
    [[0, 2, 3, 4, 5, 1],
     [0, 3, 4, 5, 6, 1]
     ])
).argmax(dim=1)

tensor([[5, 6, 7, 2, 3],
        [6, 7, 2, 3, 4]])