In [91]:
import torch
import os
import numpy as np
import torchvision as tv
import matplotlib.pyplot as plt
from torchvision import datasets, models, transforms
%matplotlib inline

In [219]:
seq_count = 1000
seq_len = 20
X = np.random.randint(10, size=(seq_count, seq_len), dtype=int)
y = np.zeros((seq_count, seq_len), dtype=int)
X[:10]

array([[0, 1, 6, 1, 0, 3, 5, 0, 1, 9, 4, 4, 9, 8, 1, 7, 3, 3, 4, 2],
       [3, 7, 5, 3, 2, 4, 6, 6, 5, 4, 5, 5, 3, 5, 3, 7, 1, 0, 6, 5],
       [3, 7, 9, 8, 6, 4, 7, 3, 0, 8, 7, 8, 1, 1, 9, 3, 4, 4, 4, 6],
       [5, 8, 0, 1, 9, 8, 6, 9, 8, 1, 5, 2, 3, 5, 8, 6, 4, 7, 0, 8],
       [2, 3, 5, 5, 8, 6, 2, 3, 0, 3, 5, 2, 2, 4, 3, 9, 5, 6, 1, 5],
       [3, 6, 7, 3, 8, 6, 1, 3, 3, 0, 9, 9, 6, 8, 0, 6, 8, 6, 1, 4],
       [5, 3, 7, 0, 1, 0, 4, 0, 3, 9, 8, 2, 6, 3, 9, 6, 1, 0, 6, 7],
       [3, 7, 2, 1, 4, 5, 3, 7, 4, 9, 5, 4, 7, 8, 9, 0, 4, 8, 0, 4],
       [8, 9, 7, 5, 1, 9, 3, 4, 1, 1, 9, 2, 4, 2, 8, 5, 5, 5, 4, 2],
       [9, 2, 9, 7, 0, 8, 8, 2, 6, 6, 5, 7, 7, 0, 5, 7, 4, 3, 3, 4]])

In [220]:
for i in range(seq_count):
  y[i][0] = X[i][0]
  for j in range(1, seq_len):
    num = X[i][j] + X[i][0]
    y[i][j] = num - 10 if num >= 10 else num

X[0:1], y[:1]

(array([[0, 1, 6, 1, 0, 3, 5, 0, 1, 9, 4, 4, 9, 8, 1, 7, 3, 3, 4, 2]]),
 array([[0, 1, 6, 1, 0, 3, 5, 0, 1, 9, 4, 4, 9, 8, 1, 7, 3, 3, 4, 2]]))

In [221]:
y = y[:,-1]
y.shape

(1000,)

In [222]:
X, y = torch.from_numpy(X), torch.from_numpy(y)

In [223]:
batch_size = 100

dataset = torch.utils.data.TensorDataset(X, y)
dataset_loader = torch.utils.data.DataLoader(dataset, batch_size, shuffle=True)

In [251]:
class Network(torch.nn.Module):
    def __init__(self, network_type, vocab_size, embed_dim, hidden_dim):
        super().__init__()
        self.embed = torch.nn.Embedding(vocab_size, embed_dim)
        self.rnn = network_type(embed_dim, hidden_dim, batch_first=True)
        self.linear = torch.nn.Linear(hidden_dim, vocab_size)
        
    def forward(self, inp):
      embedding = self.embed(inp)
      out, _ = self.rnn(embedding)
      return self.linear(out)


In [229]:
def train_model(model, loader, loss_fn, optimizer, epochs=10):
  train_losses = []
  for epoch in range(epochs):
    train_loss = 0.0

    model.train()
    for X_batch, y_batch in loader:
        optimizer.zero_grad()
        y_pred = model.forward(X_batch)  
        y_pred = y_pred.view(-1, 200)
        y_batch = y_batch
        loss = loss_fn(y_pred, y_batch)
        train_loss += loss.item()
        loss.backward()
        optimizer.step()

    train_loss /= len(loader)
    train_losses.append(train_loss)
    print(f'Epoch: {epoch}, loss: {train_loss:.3f}')


In [253]:
vocab_size = 10
embed_dim = 64
hidden_dim = 128

model = Network(torch.nn.RNN, vocab_size, embed_dim, hidden_dim)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(list(model.parameters()), lr=0.001)
train_model(model, dataset_loader, loss_fn, optimizer, 100)

Epoch: 0, loss: 5.166
Epoch: 1, loss: 4.181
Epoch: 2, loss: 3.143
Epoch: 3, loss: 2.592
Epoch: 4, loss: 2.397
Epoch: 5, loss: 2.330
Epoch: 6, loss: 2.305
Epoch: 7, loss: 2.293
Epoch: 8, loss: 2.287
Epoch: 9, loss: 2.285
Epoch: 10, loss: 2.282
Epoch: 11, loss: 2.280
Epoch: 12, loss: 2.277
Epoch: 13, loss: 2.276
Epoch: 14, loss: 2.275
Epoch: 15, loss: 2.274
Epoch: 16, loss: 2.273
Epoch: 17, loss: 2.272
Epoch: 18, loss: 2.272
Epoch: 19, loss: 2.272
Epoch: 20, loss: 2.270
Epoch: 21, loss: 2.271
Epoch: 22, loss: 2.270
Epoch: 23, loss: 2.269
Epoch: 24, loss: 2.270
Epoch: 25, loss: 2.270
Epoch: 26, loss: 2.271
Epoch: 27, loss: 2.268
Epoch: 28, loss: 2.270
Epoch: 29, loss: 2.268
Epoch: 30, loss: 2.268
Epoch: 31, loss: 2.269
Epoch: 32, loss: 2.268
Epoch: 33, loss: 2.268
Epoch: 34, loss: 2.268
Epoch: 35, loss: 2.270
Epoch: 36, loss: 2.267
Epoch: 37, loss: 2.268
Epoch: 38, loss: 2.267
Epoch: 39, loss: 2.268
Epoch: 40, loss: 2.268
Epoch: 41, loss: 2.268
Epoch: 42, loss: 2.269
Epoch: 43, loss: 2.26

In [256]:
model = Network(torch.nn.GRU, vocab_size, embed_dim, hidden_dim)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(list(model.parameters()), lr=0.01)
train_model(model, dataset_loader, loss_fn, optimizer, 100)

Epoch: 0, loss: 5.318
Epoch: 1, loss: 5.308
Epoch: 2, loss: 5.298
Epoch: 3, loss: 5.288
Epoch: 4, loss: 5.278
Epoch: 5, loss: 5.268
Epoch: 6, loss: 5.258
Epoch: 7, loss: 5.247
Epoch: 8, loss: 5.236
Epoch: 9, loss: 5.225
Epoch: 10, loss: 5.212
Epoch: 11, loss: 5.199
Epoch: 12, loss: 5.185
Epoch: 13, loss: 5.170
Epoch: 14, loss: 5.154
Epoch: 15, loss: 5.136
Epoch: 16, loss: 5.117
Epoch: 17, loss: 5.095
Epoch: 18, loss: 5.072
Epoch: 19, loss: 5.046
Epoch: 20, loss: 5.017
Epoch: 21, loss: 4.984
Epoch: 22, loss: 4.948
Epoch: 23, loss: 4.908
Epoch: 24, loss: 4.863
Epoch: 25, loss: 4.812
Epoch: 26, loss: 4.755
Epoch: 27, loss: 4.691
Epoch: 28, loss: 4.619
Epoch: 29, loss: 4.539
Epoch: 30, loss: 4.450
Epoch: 31, loss: 4.352
Epoch: 32, loss: 4.246
Epoch: 33, loss: 4.132
Epoch: 34, loss: 4.012
Epoch: 35, loss: 3.889
Epoch: 36, loss: 3.765
Epoch: 37, loss: 3.643
Epoch: 38, loss: 3.526
Epoch: 39, loss: 3.417
Epoch: 40, loss: 3.317
Epoch: 41, loss: 3.227
Epoch: 42, loss: 3.147
Epoch: 43, loss: 3.07

In [258]:
model = Network(torch.nn.LSTM, vocab_size, embed_dim, hidden_dim)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(list(model.parameters()), lr=0.5)
train_model(model, dataset_loader, loss_fn, optimizer, 100)

Epoch: 0, loss: 5.209
Epoch: 1, loss: 3.874
Epoch: 2, loss: 2.514
Epoch: 3, loss: 2.390
Epoch: 4, loss: 2.349
Epoch: 5, loss: 2.327
Epoch: 6, loss: 2.317
Epoch: 7, loss: 2.307
Epoch: 8, loss: 2.301
Epoch: 9, loss: 2.296
Epoch: 10, loss: 2.294
Epoch: 11, loss: 2.293
Epoch: 12, loss: 2.297
Epoch: 13, loss: 2.291
Epoch: 14, loss: 2.289
Epoch: 15, loss: 2.287
Epoch: 16, loss: 2.286
Epoch: 17, loss: 2.288
Epoch: 18, loss: 2.287
Epoch: 19, loss: 2.285
Epoch: 20, loss: 2.281
Epoch: 21, loss: 2.280
Epoch: 22, loss: 2.283
Epoch: 23, loss: 2.280
Epoch: 24, loss: 2.280
Epoch: 25, loss: 2.282
Epoch: 26, loss: 2.281
Epoch: 27, loss: 2.281
Epoch: 28, loss: 2.278
Epoch: 29, loss: 2.282
Epoch: 30, loss: 2.283
Epoch: 31, loss: 2.280
Epoch: 32, loss: 2.281
Epoch: 33, loss: 2.278
Epoch: 34, loss: 2.282
Epoch: 35, loss: 2.277
Epoch: 36, loss: 2.281
Epoch: 37, loss: 2.276
Epoch: 38, loss: 2.281
Epoch: 39, loss: 2.281
Epoch: 40, loss: 2.283
Epoch: 41, loss: 2.277
Epoch: 42, loss: 2.275
Epoch: 43, loss: 2.28