In [2]:
import torch as tr
import copy
import time
import torch
from torch import nn
import re
import random
import tqdm
import pandas as pd
import os
import sys

dev = tr.device('cuda' if tr.cuda.is_available() else 'cpu')
dev

device(type='cuda')

In [3]:
def f(x):
  return (x + x[:,0:1]) % 10

In [12]:
class RNN(tr.nn.Module):
    def __init__(self, n_classes, embedding_size, hidden_size, num_layers, rnn_type):
        super().__init__()
        self.embedding = tr.nn.Embedding(n_classes, embedding_size)
        self.rnn = rnn_type(embedding_size, hidden_size, num_layers, batch_first = True)
        self.linear = tr.nn.Linear(hidden_size, n_classes)

    def forward(self, input):
        r = self.embedding(input)
        r = self.rnn(r)[0]
        r = self.linear(r).squeeze()

        return r

In [32]:
rnn_list = [tr.nn.RNN, tr.nn.LSTM, tr.nn.GRU]

In [33]:
def proc(key_, list_, df_, base_model, conf):
    res = dict(
        loss_best=conf['loss_best'],
        model_best=None,
        type_=None,
        i_=0,
        score_=0
    )

    t = tr.tensor

    for i in list_:
        for rnn_type in rnn_list:
          conf[key_] = i

          model = base_model(**conf, rnn_type=rnn_type).to(dev)

          opt   = tr.optim.Adam(model.parameters(), lr=conf['learn_rate'])
          crit  = tr.nn.CrossEntropyLoss()

          for iter in range(conf['n_iters']):
              model.train()
              x = tr.randint(conf['n_classes'], (conf['batch_size'], conf['seq_len'])).to(dev)
              y = f(x)
              opt.zero_grad()
              pred = model(x)
              score = (y == pred.argmax(dim=-1)).sum() / t(y.shape).prod()
              loss = crit(pred.reshape(-1, conf['n_classes']), y.flatten()).to(dev)
              if loss < res['loss_best']:
                  res['model_best'] = copy.copy(model)
                  res['loss_best'] = loss
              loss.backward()
              opt.step()

          model.eval()
          x = tr.randint(conf['n_classes'], (conf['batch_size'], conf['seq_len'])).to(dev)
          y = f(x)
          pred = model(x).argmax(dim=-1)
          score = (y == pred).sum() / t(y.shape).prod()

          res['type_'] = rnn_type.__name__
          res['i_'] = i
          res['score_'] = round(score.item(), 5)

          print(f"TYPE::{res['type_']} I::{res['i_']} SCORE::{res['score_']}")

          yield res

Влияние размерности входного тензора

In [36]:
conf = dict(
    n_iters=1000,
    batch_size=100,
    seq_len=100,
    loss_best=10**10,
    learn_rate=0.0015,
    rnn_list=rnn_list,
    n_classes=10,
    embedding_size=10,
    hidden_size=50,
    num_layers=1,
    dev=dev
)

list_ = [20,50,100,125,150,200]

df = pd.DataFrame(index=[x.__name__ for x in rnn_list], columns=list_)

for res in proc(key_='seq_len', list_=list_, df_=df, base_model=RNN, conf=conf):
    df.loc[res['type_'], res['i_']] = res['score_']

df

TYPE::RNN I::20 SCORE::0.2515
TYPE::LSTM I::20 SCORE::1.0
TYPE::GRU I::20 SCORE::1.0
TYPE::RNN I::50 SCORE::0.1208
TYPE::LSTM I::50 SCORE::0.5812
TYPE::GRU I::50 SCORE::1.0
TYPE::RNN I::100 SCORE::0.087
TYPE::LSTM I::100 SCORE::0.5007
TYPE::GRU I::100 SCORE::0.7155
TYPE::RNN I::125 SCORE::0.10824
TYPE::LSTM I::125 SCORE::0.24152
TYPE::GRU I::125 SCORE::0.70288
TYPE::RNN I::150 SCORE::0.11207
TYPE::LSTM I::150 SCORE::0.1102
TYPE::GRU I::150 SCORE::0.49027
TYPE::RNN I::200 SCORE::0.10595
TYPE::LSTM I::200 SCORE::0.1981
TYPE::GRU I::200 SCORE::0.25795


Unnamed: 0,20,50,100,125,150,200
RNN,0.2515,0.1208,0.087,0.10824,0.11207,0.10595
LSTM,1.0,0.5812,0.5007,0.24152,0.1102,0.1981
GRU,1.0,1.0,0.7155,0.70288,0.49027,0.25795


Влияние количества nn слоев

In [37]:
conf = dict(
    n_iters=1000,
    batch_size=100,
    seq_len=100,
    loss_best=10**10,
    learn_rate=0.0015,
    rnn_list=rnn_list,
    n_classes=10,
    embedding_size=10,
    hidden_size=50,
    num_layers=1,
    dev=dev
)

list_ = [1,4,6,9]

df = pd.DataFrame(index=[x.__name__ for x in rnn_list], columns=list_)

for res in proc(key_='num_layers', list_=list_, df_=df, base_model=RNN, conf=conf):
    df.loc[res['type_'], res['i_']] = res['score_']

df

TYPE::RNN I::1 SCORE::0.1037
TYPE::LSTM I::1 SCORE::0.4942
TYPE::GRU I::1 SCORE::0.9996
TYPE::RNN I::4 SCORE::0.3035
TYPE::LSTM I::4 SCORE::0.5015
TYPE::GRU I::4 SCORE::0.9958
TYPE::RNN I::6 SCORE::0.3244
TYPE::LSTM I::6 SCORE::0.276
TYPE::GRU I::6 SCORE::0.0985
TYPE::RNN I::9 SCORE::0.2023
TYPE::LSTM I::9 SCORE::0.1042
TYPE::GRU I::9 SCORE::0.2088


Unnamed: 0,1,4,6,9
RNN,0.1037,0.3035,0.3244,0.2023
LSTM,0.4942,0.5015,0.276,0.1042
GRU,0.9996,0.9958,0.0985,0.2088


Влияние размерности скрытого слоя

In [38]:
conf = dict(
    n_iters=1000,
    batch_size=100,
    seq_len=100,
    loss_best=10**10,
    learn_rate=0.0015,
    rnn_list=rnn_list,
    n_classes=10,
    embedding_size=10,
    hidden_size=50,
    num_layers=1,
    dev=dev
)

list_ = [20,50,100,125,150,200]

df = pd.DataFrame(index=[x.__name__ for x in rnn_list], columns=list_)

for res in proc(key_='hidden_size', list_=list_, df_=df, base_model=RNN, conf=conf):
    df.loc[res['type_'], res['i_']] = res['score_']

df

TYPE::RNN I::20 SCORE::0.1015
TYPE::LSTM I::20 SCORE::0.1975
TYPE::GRU I::20 SCORE::0.1979
TYPE::RNN I::50 SCORE::0.1285
TYPE::LSTM I::50 SCORE::0.5267
TYPE::GRU I::50 SCORE::0.9992
TYPE::RNN I::100 SCORE::0.1011
TYPE::LSTM I::100 SCORE::0.5192
TYPE::GRU I::100 SCORE::1.0
TYPE::RNN I::125 SCORE::0.1203
TYPE::LSTM I::125 SCORE::1.0
TYPE::GRU I::125 SCORE::1.0
TYPE::RNN I::150 SCORE::0.1117
TYPE::LSTM I::150 SCORE::1.0
TYPE::GRU I::150 SCORE::1.0
TYPE::RNN I::200 SCORE::0.1167
TYPE::LSTM I::200 SCORE::0.7984
TYPE::GRU I::200 SCORE::1.0


Unnamed: 0,20,50,100,125,150,200
RNN,0.1015,0.1285,0.1011,0.1203,0.1117,0.1167
LSTM,0.1975,0.5267,0.5192,1.0,1.0,0.7984
GRU,0.1979,0.9992,1.0,1.0,1.0,1.0
