### Libraries

In [35]:
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import ParameterGrid
import matplotlib.pyplot as plt
from matplotlib import gridspec
from tqdm import tqdm
import numpy as np

from torch.utils.tensorboard import SummaryWriter
from torchvision.datasets import FashionMNIST
from torchvision import datasets, transforms
import torchvision
import torch

import warnings
warnings.filterwarnings("ignore")

In [40]:
# !unzip experiment.zip -d .
%load_ext tensorboard
%tensorboard --logdir ./experiment/

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 31418), started 4:53:23 ago. (Use '!kill 31418' to kill it.)

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

### Dataset & general parameters

Скачаем dataset для восстановления Part-of-Speech (POS) тегов для токенов в предложениях.

Общий алгоритм подготовки данных прост и понятен: извлекаем слова в предложениях (токены) и их теги, строим отображение токенов и тегов в индексы (т.е. преобразуем их в последовательности чисел); далее создайм кастомный dataset и добавляем возможность padding'а через проброс кастомного `collate_fn` в `DataLoader`. 

In [6]:
# Uncomment string below to download dataset
# !wget https://storage.yandexcloud.net/natasha-nerus/data/nerus_lenta.conllu.gz

In [7]:
from nerus import load_nerus
docs = load_nerus('nerus_lenta.conllu.gz')
special_tokens = ['<UNK>', '<PAD>']
special_tags = ['<PAD>']

sentences = []
tags = []

cnt = 0
n_docs_max = 5000

for doc in tqdm(docs):
    cnt += 1
    for sent in doc.sents:
        sent_ = []
        tag_ = []

        for word in sent.tokens:
            tag_.append(word.pos)
            sent_.append(word.text)

        sentences.append(sent_)
        tags.append(tag_)

    if cnt > n_docs_max:
        break

5000it [00:02, 1895.01it/s]


In [8]:
set_tokens = {word for sent in sentences for word in sent}
set_tokens.difference_update(special_tokens)
list_tokens = special_tokens + list(set_tokens)

set_tags = {tag for t in tags for tag in t}
set_tags.difference_update(special_tags)
list_tags = special_tags + list(set_tags)

token_to_idx = dict(zip(list_tokens, np.arange(len(list_tokens))))
tag_to_idx = dict(zip(list_tags, np.arange(len(list_tags))))

In [9]:
train_test_boundary = int(len(sentences) * 0.9)

train_sentences = sentences[:train_test_boundary]
train_tags = tags[:train_test_boundary]
test_sentences = sentences[train_test_boundary:]
test_tags = tags[train_test_boundary:]

print(len(train_sentences), len(test_sentences))

53112 5902


In [10]:
class TaggingDataset(torch.utils.data.Dataset):
    def __init__(self, sentences, tags, token_to_idx, tag_to_idx):
        super().__init__()

        self.sentences = sentences
        self.tags = tags
        self.token_to_idx = token_to_idx
        self.tag_to_idx = tag_to_idx

        sent_index = []
        tags_index = []

        for sent in sentences:
            sequence = []

            for token in sent:
                if token in self.token_to_idx:
                    sequence.append(token_to_idx[token])
                else:
                    sequence.append(0)

            sent_index.append(sequence)

        for sent_tags in tags:
            tgs = []

            for tag in sent_tags:
                tgs.append(tag_to_idx[tag])

            tags_index.append(tgs)

        self.sent_index = sent_index
        self.tags_index = tags_index

    def __getitem__(self, idx):
        return torch.tensor(self.sent_index[idx]), torch.tensor(self.tags_index[idx])

    def __len__(self):
        return len(self.sent_index)

In [11]:
train_dataset = TaggingDataset(train_sentences, train_tags, token_to_idx, tag_to_idx)
test_dataset = TaggingDataset(test_sentences, test_tags, token_to_idx, tag_to_idx)

In [12]:
class PaddingCollator:
    def __init__(self, pad_token_id, pad_tag_id):
        self.pad_token_idx = pad_token_id
        self.pad_tag_id = pad_tag_id

    def __call__(self, batch):

        max_len = 0
        for elem in batch:
            if (len(elem[0]) > max_len):
                max_len = len(elem[0])

        new_sentences = []
        new_tags = []

        for elem in batch:
            new_sentences.append(torch.nn.functional.pad(elem[0], (0, max_len - len(elem[0])), "constant", self.pad_token_idx))
            new_tags.append(torch.nn.functional.pad(elem[1], (0, max_len - len(elem[1])), "constant", self.pad_tag_id))

        return torch.stack(new_sentences), torch.stack(new_tags)

In [13]:
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=64,
    shuffle=False,
    drop_last=False,
    collate_fn=PaddingCollator(
        pad_token_id=token_to_idx['<PAD>'],
        pad_tag_id=tag_to_idx['<PAD>'],
    )
)

test_dataloader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,
    drop_last=False,
    collate_fn=PaddingCollator(
        pad_token_id=token_to_idx['<PAD>'],
        pad_tag_id=tag_to_idx['<PAD>'],
    )
)

### General training code

Код для обучения, формат которого взят с семинаров.

In [14]:
def train_on_batch(model, x_batch, y_batch, optimizer, loss_function):
    model.train()
    model.zero_grad()

    x_batch = x_batch.to(device)
    y_batch = y_batch.to(device)

    output = model(x_batch)
    output = torch.transpose(output, 1, 2)
    loss = loss_function(output, y_batch)

    loss.backward()
    optimizer.step()

    return loss.cpu().item()

In [15]:
def train_epoch(train_generator, model, loss_function, optimizer, callback):
    epoch_loss = 0
    total = 0

    for it, (batch_of_x, batch_of_y) in enumerate(train_generator):
        batch_loss = train_on_batch(
            model, batch_of_x.to(device), batch_of_y.to(device), optimizer, loss_function)

        if callback is not None:
            callback(model, batch_loss)

        epoch_loss += batch_loss * len(batch_of_x)
        total += len(batch_of_x)

    return epoch_loss / total

In [16]:
def trainer(count_of_epoch,
            model,
            dataset_loader,
            loss_function,
            optimizer,
            lr=0.001,
            callback=None):
    optima = optimizer(model.parameters(), lr=lr, weight_decay=1e-5)

    iterations = tqdm(range(count_of_epoch))

    for it in iterations:
        epoch_loss = train_epoch(
            train_generator=dataset_loader, model=model,
            loss_function=loss_function,
            optimizer=optima,
            callback=callback)

        iterations.set_postfix({'train epoch loss': epoch_loss})

In [24]:
def quality_of_train(dataset_loader,
                     model,
                     loss_function):
    pred = []
    real = []
    test_loss = 0
    total = 0

    for it, (sentences, tags) in enumerate(dataset_loader):
        sentences = sentences.to(device)
        tags = tags.to(device)

        output = model(sentences)

        pred.extend(torch.argmax(output, dim=2).cpu().numpy().flatten().tolist())
        real.extend(tags.cpu().numpy().flatten().tolist())

        output = torch.transpose(output, 1, 2)
        test_loss += loss_function(output, tags).cpu().item() * len(sentences)
        total += len(sentences)

    test_loss /= total

    return test_loss, pred, real

### LSTM model

In [18]:
class LSTMTagger(torch.nn.Module):
    @property
    def device(self):
        return next(self.parameters()).device

    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size, dropout=0.0):
        super(LSTMTagger, self).__init__()

        self.word_embeddings = torch.nn.Embedding(vocab_size, embedding_dim)
        self.lstm = torch.nn.LSTM(embedding_dim, hidden_dim, batch_first=True, dropout=dropout)
        self.linear = torch.nn.Linear(hidden_dim, tagset_size)

    def forward(self, x_batch):
        embeddings = self.word_embeddings(x_batch)

        d_n, (h_n, c_n) = self.lstm(embeddings)
        return self.linear(d_n)

### Tensorboard training tracking

In [38]:
class callback():
    def __init__(self, writer, dataset_loader, loss_function, delimeter=100):
        self.step = 0
        self.writer = writer
        self.delimeter = delimeter
        self.loss_function = loss_function

        self.dataset_loader = dataset_loader

    def forward(self, model, loss):
        self.step += 1
        self.writer.add_scalar('LOSS/train', loss, self.step)
        model.eval()

        if self.step % self.delimeter == 0:
            test_loss, pred, real = quality_of_train(dataset_loader=self.dataset_loader,
                                                     model=model, loss_function=self.loss_function)
            self.writer.add_scalar('LOSS/test', test_loss, self.step)

            indices = (real != tag_to_idx['<PAD>']).nonzero()

            real = np.array(real)[indices]
            pred = np.array(pred)[indices]

            self.writer.add_scalar('Valid/acc', accuracy_score(real, pred), self.step)
            self.writer.add_text('Valid/report', str(classification_report(real, pred)), self.step)

    def __call__(self, model, loss):
        return self.forward(model, loss)

### LSTM training

In [20]:
loss_function = torch.nn.CrossEntropyLoss(ignore_index=tag_to_idx['<PAD>'])
optimizer = torch.optim.Adam

In [39]:
grid = ParameterGrid({
    'embedding_dim': [150, 290, 430, 600],
    'hidden_dim': [150, 290, 430, 600],
    'dropout': [0.0, 0.18, 0.36, 0.54],
})

scores = dict()

for item in tqdm(grid):
    print(str(item))

    model = LSTMTagger(
        embedding_dim=item['embedding_dim'],
        hidden_dim=item['hidden_dim'],
        vocab_size=len(token_to_idx),
        tagset_size=len(tag_to_idx),
        dropout=item['dropout']
    )

    writer = SummaryWriter('experiment/' + str(item))

    model.float().to(device)

    call = callback(writer, test_dataloader, loss_function, delimeter=10)

    trainer(count_of_epoch=2,
        dataset_loader=train_dataloader,
        model=model,
        loss_function=loss_function,
        optimizer=optimizer,
        lr=0.001,
        callback=call)

  0%|          | 0/64 [00:00<?, ?it/s]

{'dropout': 0.0, 'embedding_dim': 150, 'hidden_dim': 150}


100%|██████████| 2/2 [01:09<00:00, 34.54s/it, train epoch loss=0.338]
  2%|▏         | 1/64 [01:09<1:12:39, 69.19s/it]

{'dropout': 0.0, 'embedding_dim': 150, 'hidden_dim': 290}


100%|██████████| 2/2 [01:18<00:00, 39.46s/it, train epoch loss=0.328]
  3%|▎         | 2/64 [02:28<1:17:27, 74.96s/it]

{'dropout': 0.0, 'embedding_dim': 150, 'hidden_dim': 430}


100%|██████████| 2/2 [01:20<00:00, 40.48s/it, train epoch loss=0.324]
  5%|▍         | 3/64 [03:49<1:19:01, 77.74s/it]

{'dropout': 0.0, 'embedding_dim': 150, 'hidden_dim': 600}


100%|██████████| 2/2 [01:18<00:00, 39.25s/it, train epoch loss=0.319]
  6%|▋         | 4/64 [05:07<1:18:03, 78.07s/it]

{'dropout': 0.0, 'embedding_dim': 290, 'hidden_dim': 150}


100%|██████████| 2/2 [01:14<00:00, 37.46s/it, train epoch loss=0.298]
  8%|▊         | 5/64 [06:22<1:15:41, 76.98s/it]

{'dropout': 0.0, 'embedding_dim': 290, 'hidden_dim': 290}


100%|██████████| 2/2 [01:28<00:00, 44.46s/it, train epoch loss=0.287]
  9%|▉         | 6/64 [07:51<1:18:22, 81.08s/it]

{'dropout': 0.0, 'embedding_dim': 290, 'hidden_dim': 430}


100%|██████████| 2/2 [01:30<00:00, 45.37s/it, train epoch loss=0.282]
 11%|█         | 7/64 [09:22<1:20:04, 84.29s/it]

{'dropout': 0.0, 'embedding_dim': 290, 'hidden_dim': 600}


100%|██████████| 2/2 [01:28<00:00, 44.34s/it, train epoch loss=0.278]
 12%|█▎        | 8/64 [10:51<1:20:00, 85.73s/it]

{'dropout': 0.0, 'embedding_dim': 430, 'hidden_dim': 150}


100%|██████████| 2/2 [01:23<00:00, 41.80s/it, train epoch loss=0.276]
 14%|█▍        | 9/64 [12:15<1:18:03, 85.16s/it]

{'dropout': 0.0, 'embedding_dim': 430, 'hidden_dim': 290}


100%|██████████| 2/2 [01:34<00:00, 47.28s/it, train epoch loss=0.263]
 16%|█▌        | 10/64 [13:50<1:19:18, 88.12s/it]

{'dropout': 0.0, 'embedding_dim': 430, 'hidden_dim': 430}


100%|██████████| 2/2 [01:37<00:00, 48.70s/it, train epoch loss=0.259]
 17%|█▋        | 11/64 [15:27<1:20:24, 91.02s/it]

{'dropout': 0.0, 'embedding_dim': 430, 'hidden_dim': 600}


100%|██████████| 2/2 [01:35<00:00, 47.85s/it, train epoch loss=0.254]
 19%|█▉        | 12/64 [17:03<1:20:10, 92.51s/it]

{'dropout': 0.0, 'embedding_dim': 600, 'hidden_dim': 150}


100%|██████████| 2/2 [01:31<00:00, 45.51s/it, train epoch loss=0.254]
 20%|██        | 13/64 [18:35<1:18:20, 92.17s/it]

{'dropout': 0.0, 'embedding_dim': 600, 'hidden_dim': 290}


100%|██████████| 2/2 [01:41<00:00, 50.69s/it, train epoch loss=0.244]
 22%|██▏       | 14/64 [20:16<1:19:11, 95.04s/it]

{'dropout': 0.0, 'embedding_dim': 600, 'hidden_dim': 430}


100%|██████████| 2/2 [01:43<00:00, 51.53s/it, train epoch loss=0.238]
 23%|██▎       | 15/64 [22:00<1:19:39, 97.54s/it]

{'dropout': 0.0, 'embedding_dim': 600, 'hidden_dim': 600}


100%|██████████| 2/2 [01:40<00:00, 50.21s/it, train epoch loss=0.231]
 25%|██▌       | 16/64 [23:40<1:18:47, 98.49s/it]

{'dropout': 0.18, 'embedding_dim': 150, 'hidden_dim': 150}


100%|██████████| 2/2 [01:08<00:00, 34.22s/it, train epoch loss=0.338]
 27%|██▋       | 17/64 [24:49<1:10:05, 89.48s/it]

{'dropout': 0.18, 'embedding_dim': 150, 'hidden_dim': 290}


100%|██████████| 2/2 [01:19<00:00, 39.52s/it, train epoch loss=0.331]
 28%|██▊       | 18/64 [26:08<1:06:12, 86.37s/it]

{'dropout': 0.18, 'embedding_dim': 150, 'hidden_dim': 430}


100%|██████████| 2/2 [01:20<00:00, 40.34s/it, train epoch loss=0.326]
 30%|██▉       | 19/64 [27:29<1:03:30, 84.68s/it]

{'dropout': 0.18, 'embedding_dim': 150, 'hidden_dim': 600}


100%|██████████| 2/2 [01:17<00:00, 38.76s/it, train epoch loss=0.32]
 31%|███▏      | 20/64 [28:46<1:00:32, 82.55s/it]

{'dropout': 0.18, 'embedding_dim': 290, 'hidden_dim': 150}


100%|██████████| 2/2 [01:14<00:00, 37.40s/it, train epoch loss=0.297]
 33%|███▎      | 21/64 [30:01<57:31, 80.27s/it]  

{'dropout': 0.18, 'embedding_dim': 290, 'hidden_dim': 290}


100%|██████████| 2/2 [01:25<00:00, 42.91s/it, train epoch loss=0.286]
 34%|███▍      | 22/64 [31:27<57:22, 81.97s/it]

{'dropout': 0.18, 'embedding_dim': 290, 'hidden_dim': 430}


100%|██████████| 2/2 [01:27<00:00, 43.97s/it, train epoch loss=0.283]
 36%|███▌      | 23/64 [32:55<57:16, 83.81s/it]

{'dropout': 0.18, 'embedding_dim': 290, 'hidden_dim': 600}


100%|██████████| 2/2 [01:24<00:00, 42.36s/it, train epoch loss=0.278]
 38%|███▊      | 24/64 [34:20<56:05, 84.13s/it]

{'dropout': 0.18, 'embedding_dim': 430, 'hidden_dim': 150}


100%|██████████| 2/2 [01:20<00:00, 40.10s/it, train epoch loss=0.273]
 39%|███▉      | 25/64 [35:41<53:57, 83.01s/it]

{'dropout': 0.18, 'embedding_dim': 430, 'hidden_dim': 290}


100%|██████████| 2/2 [01:32<00:00, 46.09s/it, train epoch loss=0.264]
 41%|████      | 26/64 [37:13<54:21, 85.82s/it]

{'dropout': 0.18, 'embedding_dim': 430, 'hidden_dim': 430}


100%|██████████| 2/2 [01:35<00:00, 47.90s/it, train epoch loss=0.258]
 42%|████▏     | 27/64 [38:49<54:48, 88.87s/it]

{'dropout': 0.18, 'embedding_dim': 430, 'hidden_dim': 600}


100%|██████████| 2/2 [01:34<00:00, 47.28s/it, train epoch loss=0.254]
 44%|████▍     | 28/64 [40:24<54:23, 90.64s/it]

{'dropout': 0.18, 'embedding_dim': 600, 'hidden_dim': 150}


100%|██████████| 2/2 [01:29<00:00, 44.63s/it, train epoch loss=0.255]
 45%|████▌     | 29/64 [41:53<52:41, 90.34s/it]

{'dropout': 0.18, 'embedding_dim': 600, 'hidden_dim': 290}


100%|██████████| 2/2 [01:40<00:00, 50.29s/it, train epoch loss=0.244]
 47%|████▋     | 30/64 [43:34<52:59, 93.50s/it]

{'dropout': 0.18, 'embedding_dim': 600, 'hidden_dim': 430}


100%|██████████| 2/2 [01:43<00:00, 51.64s/it, train epoch loss=0.236]
 48%|████▊     | 31/64 [45:18<53:05, 96.53s/it]

{'dropout': 0.18, 'embedding_dim': 600, 'hidden_dim': 600}


100%|██████████| 2/2 [01:42<00:00, 51.34s/it, train epoch loss=0.231]
 50%|█████     | 32/64 [47:01<52:30, 98.46s/it]

{'dropout': 0.36, 'embedding_dim': 150, 'hidden_dim': 150}


100%|██████████| 2/2 [01:12<00:00, 36.07s/it, train epoch loss=0.336]
 52%|█████▏    | 33/64 [48:13<46:48, 90.59s/it]

{'dropout': 0.36, 'embedding_dim': 150, 'hidden_dim': 290}


100%|██████████| 2/2 [01:21<00:00, 40.84s/it, train epoch loss=0.327]
 53%|█████▎    | 34/64 [49:35<43:58, 87.94s/it]

{'dropout': 0.36, 'embedding_dim': 150, 'hidden_dim': 430}


100%|██████████| 2/2 [01:23<00:00, 41.57s/it, train epoch loss=0.324]
 55%|█████▍    | 35/64 [50:58<41:49, 86.52s/it]

{'dropout': 0.36, 'embedding_dim': 150, 'hidden_dim': 600}


100%|██████████| 2/2 [01:20<00:00, 40.10s/it, train epoch loss=0.321]
 56%|█████▋    | 36/64 [52:18<39:30, 84.65s/it]

{'dropout': 0.36, 'embedding_dim': 290, 'hidden_dim': 150}


100%|██████████| 2/2 [01:16<00:00, 38.38s/it, train epoch loss=0.298]
 58%|█████▊    | 37/64 [53:35<37:02, 82.32s/it]

{'dropout': 0.36, 'embedding_dim': 290, 'hidden_dim': 290}


100%|██████████| 2/2 [01:28<00:00, 44.15s/it, train epoch loss=0.288]
 59%|█████▉    | 38/64 [55:04<36:28, 84.16s/it]

{'dropout': 0.36, 'embedding_dim': 290, 'hidden_dim': 430}


100%|██████████| 2/2 [01:28<00:00, 44.05s/it, train epoch loss=0.282]
 61%|██████    | 39/64 [56:32<35:34, 85.39s/it]

{'dropout': 0.36, 'embedding_dim': 290, 'hidden_dim': 600}


100%|██████████| 2/2 [01:25<00:00, 42.89s/it, train epoch loss=0.277]
 62%|██████▎   | 40/64 [57:58<34:13, 85.55s/it]

{'dropout': 0.36, 'embedding_dim': 430, 'hidden_dim': 150}


100%|██████████| 2/2 [01:20<00:00, 40.12s/it, train epoch loss=0.275]
 64%|██████▍   | 41/64 [59:18<32:12, 84.04s/it]

{'dropout': 0.36, 'embedding_dim': 430, 'hidden_dim': 290}


100%|██████████| 2/2 [01:32<00:00, 46.42s/it, train epoch loss=0.264]
 66%|██████▌   | 42/64 [1:00:51<31:48, 86.74s/it]

{'dropout': 0.36, 'embedding_dim': 430, 'hidden_dim': 430}


100%|██████████| 2/2 [01:33<00:00, 46.94s/it, train epoch loss=0.258]
 67%|██████▋   | 43/64 [1:02:26<31:08, 88.96s/it]

{'dropout': 0.36, 'embedding_dim': 430, 'hidden_dim': 600}


100%|██████████| 2/2 [01:32<00:00, 46.11s/it, train epoch loss=0.252]
 69%|██████▉   | 44/64 [1:03:58<29:59, 90.00s/it]

{'dropout': 0.36, 'embedding_dim': 600, 'hidden_dim': 150}


100%|██████████| 2/2 [01:26<00:00, 43.13s/it, train epoch loss=0.254]
 70%|███████   | 45/64 [1:05:25<28:10, 89.00s/it]

{'dropout': 0.36, 'embedding_dim': 600, 'hidden_dim': 290}


100%|██████████| 2/2 [01:38<00:00, 49.47s/it, train epoch loss=0.243]
 72%|███████▏  | 46/64 [1:07:04<27:37, 92.07s/it]

{'dropout': 0.36, 'embedding_dim': 600, 'hidden_dim': 430}


100%|██████████| 2/2 [01:41<00:00, 50.60s/it, train epoch loss=0.238]
 73%|███████▎  | 47/64 [1:08:45<26:53, 94.93s/it]

{'dropout': 0.36, 'embedding_dim': 600, 'hidden_dim': 600}


100%|██████████| 2/2 [01:39<00:00, 49.98s/it, train epoch loss=0.231]
 75%|███████▌  | 48/64 [1:10:26<25:44, 96.54s/it]

{'dropout': 0.54, 'embedding_dim': 150, 'hidden_dim': 150}


100%|██████████| 2/2 [01:08<00:00, 34.43s/it, train epoch loss=0.339]
 77%|███████▋  | 49/64 [1:11:35<22:03, 88.26s/it]

{'dropout': 0.54, 'embedding_dim': 150, 'hidden_dim': 290}


100%|██████████| 2/2 [01:21<00:00, 40.53s/it, train epoch loss=0.329]
 78%|███████▊  | 50/64 [1:12:56<20:05, 86.12s/it]

{'dropout': 0.54, 'embedding_dim': 150, 'hidden_dim': 430}


100%|██████████| 2/2 [01:21<00:00, 40.76s/it, train epoch loss=0.323]
 80%|███████▉  | 51/64 [1:14:17<18:21, 84.77s/it]

{'dropout': 0.54, 'embedding_dim': 150, 'hidden_dim': 600}


100%|██████████| 2/2 [01:18<00:00, 39.32s/it, train epoch loss=0.32]
 81%|████████▏ | 52/64 [1:15:36<16:35, 82.96s/it]

{'dropout': 0.54, 'embedding_dim': 290, 'hidden_dim': 150}


100%|██████████| 2/2 [01:13<00:00, 36.69s/it, train epoch loss=0.297]
 83%|████████▎ | 53/64 [1:16:50<14:41, 80.13s/it]

{'dropout': 0.54, 'embedding_dim': 290, 'hidden_dim': 290}


100%|██████████| 2/2 [01:27<00:00, 43.99s/it, train epoch loss=0.288]
 84%|████████▍ | 54/64 [1:18:18<13:45, 82.53s/it]

{'dropout': 0.54, 'embedding_dim': 290, 'hidden_dim': 430}


100%|██████████| 2/2 [01:28<00:00, 44.02s/it, train epoch loss=0.283]
 86%|████████▌ | 55/64 [1:19:46<12:38, 84.22s/it]

{'dropout': 0.54, 'embedding_dim': 290, 'hidden_dim': 600}


100%|██████████| 2/2 [01:25<00:00, 42.57s/it, train epoch loss=0.278]
 88%|████████▊ | 56/64 [1:21:11<11:16, 84.54s/it]

{'dropout': 0.54, 'embedding_dim': 430, 'hidden_dim': 150}


100%|██████████| 2/2 [01:19<00:00, 39.76s/it, train epoch loss=0.274]
 89%|████████▉ | 57/64 [1:22:31<09:41, 83.11s/it]

{'dropout': 0.54, 'embedding_dim': 430, 'hidden_dim': 290}


100%|██████████| 2/2 [01:32<00:00, 46.38s/it, train epoch loss=0.264]
 91%|█████████ | 58/64 [1:24:04<08:36, 86.08s/it]

{'dropout': 0.54, 'embedding_dim': 430, 'hidden_dim': 430}


100%|██████████| 2/2 [01:34<00:00, 47.34s/it, train epoch loss=0.259]
 92%|█████████▏| 59/64 [1:25:39<07:23, 88.72s/it]

{'dropout': 0.54, 'embedding_dim': 430, 'hidden_dim': 600}


100%|██████████| 2/2 [01:32<00:00, 46.07s/it, train epoch loss=0.253]
 94%|█████████▍| 60/64 [1:27:11<05:59, 89.82s/it]

{'dropout': 0.54, 'embedding_dim': 600, 'hidden_dim': 150}


100%|██████████| 2/2 [01:26<00:00, 43.08s/it, train epoch loss=0.254]
 95%|█████████▌| 61/64 [1:28:38<04:26, 88.85s/it]

{'dropout': 0.54, 'embedding_dim': 600, 'hidden_dim': 290}


100%|██████████| 2/2 [01:40<00:00, 50.07s/it, train epoch loss=0.243]
 97%|█████████▋| 62/64 [1:30:18<03:04, 92.34s/it]

{'dropout': 0.54, 'embedding_dim': 600, 'hidden_dim': 430}


100%|██████████| 2/2 [01:41<00:00, 50.70s/it, train epoch loss=0.236]
 98%|█████████▊| 63/64 [1:32:00<01:35, 95.16s/it]

{'dropout': 0.54, 'embedding_dim': 600, 'hidden_dim': 600}


100%|██████████| 2/2 [01:40<00:00, 50.10s/it, train epoch loss=0.229]
100%|██████████| 64/64 [1:33:41<00:00, 87.83s/it]


### Conclusions

Результаты получились следующие:
* Dropout не сильно влияет на точность модели, что более-менее ожидаемо.
* Чем больше размерность embedding слоя, тем больше точность (не особо сильно), что ожидаемо.
* Чем больше размерность hidden слоя, тем больше точность (не особо сильно), что ожидаемо.
* Наибольший score набрала модель с максимальной размерностью embedding слоя и hidden слоя, что подтверждает наблюдения выше.
* В целом модель хорошо справилась со своей задачей: accuracy модели превышает 0.9 на определённых наборах параметров. 
