In [30]:
import torch
import torch.nn as nn
import time
import random
import numpy as np
from torch import optim
import torch.nn.functional as F
import warnings
warnings.filterwarnings("ignore")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [76]:
LENGTH = 10
N = 10000
LEARNING_RATE = 0.1
BATCH_SIZE=512
INPUT_DIM = 10

In [77]:
device

device(type='cuda')

Создадим последовательности Х и У для тренировочных и тестовых данных и обернем в DataLoader для последующего обучения модели

In [78]:
def create_x_y(length=LENGTH, n=N):
    X = torch.randint(0, 10, size=(n, length), dtype=torch.int64)
    Y = torch.zeros((n, length), dtype=torch.int64)
    for i in range(n):
        for j in range(len(X[i])):
            if j==0:
                Y[i][j] = X[i][j]
            else:
                Y[i][j] = X[i][0] + X[i][j]
                if Y[i][j] >= 10:
                    Y[i][j] -=10
    return X, Y

In [79]:
X, Y = create_x_y()
X_test, Y_test = create_x_y(n=2000)

In [80]:
X[:5]

tensor([[6, 0, 7, 0, 6, 2, 5, 7, 4, 5],
        [1, 8, 0, 5, 7, 1, 4, 4, 2, 8],
        [3, 3, 5, 1, 7, 2, 7, 6, 1, 3],
        [4, 8, 3, 5, 7, 7, 7, 1, 1, 8],
        [5, 8, 5, 6, 2, 1, 5, 6, 1, 1]])

In [81]:
Y[:5]

tensor([[6, 6, 3, 6, 2, 8, 1, 3, 0, 1],
        [1, 9, 1, 6, 8, 2, 5, 5, 3, 9],
        [3, 6, 8, 4, 0, 5, 0, 9, 4, 6],
        [4, 2, 7, 9, 1, 1, 1, 5, 5, 2],
        [5, 3, 0, 1, 7, 6, 0, 1, 6, 6]])

In [82]:
def use_dataloader(x, y):
    dataset = torch.utils.data.TensorDataset(x, y)
    data = torch.utils.data.DataLoader(dataset, BATCH_SIZE)
    return data

In [83]:
train = use_dataloader(X, Y)
test = use_dataloader(X_test, Y_test)

Создадим класс для создания модели нейронной сети и также функцию для обучения модели

In [84]:
class RNNFlex(torch.nn.Module):

    def __init__(self, model, embed_dim, hidden_dim, layer_dim):
        super().__init__()
        self.embed = nn.Embedding(INPUT_DIM, embed_dim)
        self.model = model(embed_dim, hidden_dim, layer_dim, batch_first=True)
        self.linear = nn.Linear(hidden_dim, INPUT_DIM)

    def forward(self, sentence, state=None):
        embed = self.embed(sentence)
        o, _ = self.model(embed)
        return self.linear(o)

In [85]:
def train_model(model, train, test, NUM_EPOCHS):
    for epoch in range(1, NUM_EPOCHS + 1):
        train_loss, train_accuracy, iter_num = .0, .0, .0
        start_epoch_time = time.time()
        model.train().to(device)
        for x, y in train:
            x = x.to(device)
            y = y.view(1, -1).squeeze().to(device)

            optimizer.zero_grad()

            out = model.forward(x).view(-1, INPUT_DIM)

            loss = loss_fn(out, y)
            train_loss += loss.item()

            batch_accuracy = (out.argmax(dim=1) == y)
            train_accuracy += batch_accuracy.sum().item() / batch_accuracy.shape[0]

            loss.backward()
            optimizer.step()
            iter_num += 1
        if (epoch < 2) | (epoch % 10 == 0):
            print(f"Epoch: {epoch}, loss: {train_loss:.4f}, acc: " f"{train_accuracy / iter_num:.4f}", end=" | ")

        test_loss, test_accuracy, iter_num = .0, .0, .0
        model.eval().to(device)
        for x, y in test:
            x = x.to(device)
            y = y.view(1, -1).squeeze().to(device)

            out = model.forward(x).view(-1, INPUT_DIM)

            loss = loss_fn(out, y)
            test_loss += loss.item()

            batch_accuracy = (out.argmax(dim=1) == y)
            test_accuracy += batch_accuracy.sum().item() / batch_accuracy.shape[0]
            iter_num += 1
        if (epoch < 2) | (epoch % 10 == 0):
            print(f"test loss: {test_loss:.4f}, test acc: {test_accuracy / iter_num:.4f} | " f"{time.time() - start_epoch_time:.2f} sec.")

Обучим обычную реккурентную сейронную сети для генерации последовательности

In [86]:
model = RNNFlex(nn.RNN, 32, 128, 5)
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

In [87]:
train_model(model, train, test, 200)

Epoch: 1, loss: 46.0711, acc: 0.1079 | test loss: 9.2074, test acc: 0.1097 | 0.28 sec.
Epoch: 10, loss: 45.8462, acc: 0.1833 | test loss: 9.1664, test acc: 0.1824 | 0.20 sec.
Epoch: 20, loss: 45.5591, acc: 0.1902 | test loss: 9.1115, test acc: 0.1872 | 0.27 sec.
Epoch: 30, loss: 45.0232, acc: 0.1904 | test loss: 8.9921, test acc: 0.1875 | 0.19 sec.
Epoch: 40, loss: 41.9403, acc: 0.1988 | test loss: 8.3657, test acc: 0.2008 | 0.35 sec.
Epoch: 50, loss: 39.4847, acc: 0.2241 | test loss: 7.8839, test acc: 0.2249 | 0.20 sec.
Epoch: 60, loss: 37.5792, acc: 0.2474 | test loss: 7.5148, test acc: 0.2488 | 0.20 sec.
Epoch: 70, loss: 35.6968, acc: 0.2755 | test loss: 7.1352, test acc: 0.2728 | 0.19 sec.
Epoch: 80, loss: 33.4851, acc: 0.3502 | test loss: 6.6361, test acc: 0.3490 | 0.20 sec.
Epoch: 90, loss: 27.5818, acc: 0.4581 | test loss: 4.7899, test acc: 0.4913 | 0.20 sec.
Epoch: 100, loss: 14.1062, acc: 0.7257 | test loss: 2.5263, test acc: 0.7694 | 0.20 sec.
Epoch: 110, loss: 1.5594, acc: 1

ЛСТМ

In [90]:
model = RNNFlex(nn.LSTM, 32, 64, 1)
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

In [91]:
train_model(model, train, test, 350)

Epoch: 1, loss: 46.0897, acc: 0.0995 | test loss: 9.2132, test acc: 0.1020 | 0.33 sec.
Epoch: 10, loss: 45.7925, acc: 0.1906 | test loss: 9.1602, test acc: 0.1879 | 0.16 sec.
Epoch: 20, loss: 45.6152, acc: 0.1906 | test loss: 9.1283, test acc: 0.1874 | 0.16 sec.
Epoch: 30, loss: 45.4998, acc: 0.1905 | test loss: 9.1078, test acc: 0.1873 | 0.38 sec.
Epoch: 40, loss: 45.4179, acc: 0.1903 | test loss: 9.0934, test acc: 0.1869 | 0.16 sec.
Epoch: 50, loss: 45.3534, acc: 0.1903 | test loss: 9.0820, test acc: 0.1868 | 0.16 sec.
Epoch: 60, loss: 45.2949, acc: 0.1904 | test loss: 9.0715, test acc: 0.1870 | 0.16 sec.
Epoch: 70, loss: 45.2323, acc: 0.1903 | test loss: 9.0599, test acc: 0.1872 | 0.16 sec.
Epoch: 80, loss: 45.1545, acc: 0.1904 | test loss: 9.0451, test acc: 0.1872 | 0.22 sec.
Epoch: 90, loss: 45.0453, acc: 0.1905 | test loss: 9.0237, test acc: 0.1875 | 0.16 sec.
Epoch: 100, loss: 44.8757, acc: 0.1908 | test loss: 8.9899, test acc: 0.1876 | 0.16 sec.
Epoch: 110, loss: 44.5864, acc: 

GRU

In [92]:
model = RNNFlex(nn.GRU, 32, 64, 1)
loss = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

In [93]:
train_model(model, train, test, 250)

Epoch: 1, loss: 46.1049, acc: 0.1077 | test loss: 9.2023, test acc: 0.1170 | 0.16 sec.
Epoch: 10, loss: 45.4903, acc: 0.1899 | test loss: 9.1088, test acc: 0.1869 | 0.15 sec.
Epoch: 20, loss: 45.3738, acc: 0.1901 | test loss: 9.0881, test acc: 0.1870 | 0.15 sec.
Epoch: 30, loss: 45.2977, acc: 0.1901 | test loss: 9.0733, test acc: 0.1870 | 0.15 sec.
Epoch: 40, loss: 45.2096, acc: 0.1901 | test loss: 9.0558, test acc: 0.1870 | 0.17 sec.
Epoch: 50, loss: 45.0831, acc: 0.1901 | test loss: 9.0306, test acc: 0.1870 | 0.23 sec.
Epoch: 60, loss: 44.8779, acc: 0.1902 | test loss: 8.9895, test acc: 0.1871 | 0.17 sec.
Epoch: 70, loss: 44.5254, acc: 0.1912 | test loss: 8.9191, test acc: 0.1877 | 0.16 sec.
Epoch: 80, loss: 43.9617, acc: 0.1950 | test loss: 8.8074, test acc: 0.1903 | 0.16 sec.
Epoch: 90, loss: 43.2087, acc: 0.2007 | test loss: 8.6595, test acc: 0.1965 | 0.16 sec.
Epoch: 100, loss: 42.3738, acc: 0.2086 | test loss: 8.4968, test acc: 0.2056 | 0.16 sec.
Epoch: 110, loss: 41.5499, acc: 

**Вывод:** Были обучены три модели нейронной сети: RNN, LSTM, GRU. Наиболее быстро обучилась простая модель нейронной сети, уже на 120 эпохе точность на тестовых данных составила 1,0. Дольше всего обучалась модель LSTM, точность 1,0 для тестовых данных была достигнута на 320 эпохе. Модель GRU поазала удовлетворительные результаты, точность 1,0 была получена на 220 эпохе