1. Сравнить LSTM, RNN и GRU на задаче предсказания части речи (качество предсказания, скорость обучения, время инференса модели)

2. \* к первой задаче добавить bidirectional

In [1]:
# Imports
import torch
from torch import optim
from torch import nn
from torch.utils.data import DataLoader, Dataset
import time


# Model pattern
class RNN_model(nn.Module):
    def __init__(self, 
                 word_vocab_len: int, 
                 n_classes: int, 
                 rnn: object, 
                 bidirect: bool,
                 emb_size: int = 128, 
                 hidden_size: int = 128):
        super().__init__()
        self.word_emb = nn.Embedding(word_vocab_len, emb_size)
        self.rnn = rnn(input_size=emb_size, hidden_size=hidden_size, batch_first=True, bidirectional=bidirect)
        self.classifier = nn.Linear(hidden_size*(2 if bidirect else 1), n_classes)

    def forward(self, x):
        embedded = self.word_emb(x)
        out, _ = self.rnn(embedded)
        return self.classifier(out)
    

# Dataset
class DatasetSeq(Dataset):
    def __init__(self, train_lang='en'):
        with open(train_lang + '.train', 'r', encoding='utf-8') as f:
            train = f.read().split('\n\n')
        # delete extra tag markup
        train = [x for x in train if not '_ ' in x]
        self.target_vocab = {}
        self.word_vocab = {}
        self.encoded_sequences = []
        self.encoded_targets = []
        n_word = 0
        n_target = 0
        
        for line in train:
            sequence = []
            target = []
            for item in line.split('\n'):
                if item != '':
                    word, label = item.split(' ')
                    if self.word_vocab.get(word) is None:
                        self.word_vocab[word] = n_word
                        n_word += 1
                    if self.target_vocab.get(label) is None:
                        self.target_vocab[label] = n_target
                        n_target += 1
                    sequence.append(self.word_vocab[word])
                    target.append(self.target_vocab[label])
                
            self.encoded_sequences.append(sequence)
            self.encoded_targets.append(target)

    def __len__(self):
        return len(self.encoded_sequences)
    
    def __getitem__(self, index):
        return {
            'data': torch.tensor(self.encoded_sequences[index]),
            'target': torch.tensor(self.encoded_targets[index]),
        }
dataset = DatasetSeq()


# Hyperparameters
learning_rate = 0.001
batch_size = 32
n_epochs = 3
vocab_len = len(dataset.word_vocab)
n_classes = len(dataset.target_vocab)


# Dataloader and collate_fn
def collate_fn(data):
    return data[0]

dataloader = DataLoader(
                dataset=dataset, 
                collate_fn=collate_fn, 
                batch_size=batch_size, 
                shuffle=True, 
                drop_last=True)


# Train
models = []
RNNs = [nn.RNN, nn.GRU, nn.LSTM]
bidirect_params = [True, False]
info_for_show = {}

for rnn in RNNs:
    info_for_show[rnn.__name__] = {}
    
    for bidirect in bidirect_params:
        info_bidirect = 'bidirect' if bidirect else 'without_bidirect'
        info_for_show[rnn.__name__][info_bidirect] = {}
        
        # Model
        model = RNN_model(vocab_len, n_classes, rnn, bidirect)
                
        # Loss and optimizer
        criterion = nn.CrossEntropyLoss()
        optimiser = optim.Adam(model.parameters(), lr=learning_rate)
        
        # start_timer
        start_train_time = time.time()
        
        # Train loop
        for epoch in range(n_epochs):
            
            for step, batch in enumerate(dataloader):
                data = batch['data'].unsqueeze(0)
                target = batch['target']
                predict = model(data)
                predict = predict.view(-1, n_classes)
                loss = criterion(predict, target)
                optimiser.zero_grad()
                loss.backward()
                optimiser.step()
        
        info_for_show[rnn.__name__][info_bidirect]['train_time'] = time.time()-start_train_time
        
        model_dict = {'name':rnn.__name__, 'bidirect':bidirect, 'model':model}
        models.append(model_dict)
        
        
# Test
for model_dict in models:
    info_bidirect = 'bidirect' if model_dict['bidirect'] else 'without_bidirect'
    model_name = model_dict["name"]
    
    model = model_dict['model']
    inferences_times = []
    
    def check_accuracy(loader, model):
        num_correct = 0
        num_samples = 0
        model.eval()
        with torch.no_grad():
            
            for step, batch in enumerate(loader):
                x = batch['data'].unsqueeze(0)
                y = batch['target']
                start_inference_time = time.time()
                scores = model(x).view(-1, n_classes)
                inference_time = time.time() - start_inference_time
                inferences_times.append(inference_time)
                _, predictions = scores.max(1)
                num_correct += (predictions == y).sum()
                num_samples += predictions.size(0)
        model.train()
        return num_correct / num_samples
    
    accuracy = check_accuracy(dataloader, model)
    mean_inference_time = sum(inferences_times)/len(inferences_times)
    
    info_for_show[model_name][info_bidirect]['accuracy'] = accuracy
    info_for_show[model_name][info_bidirect]['inference_time'] = mean_inference_time

In [2]:
for model_name in info_for_show:
    for bidirect_or_not in info_for_show[model_name]:
        print(model_name, bidirect_or_not)
        info = info_for_show[model_name][bidirect_or_not]
        print('train time = ', f"{info['train_time']:.2f} sec")
        print('accuracy = ', f"{info['accuracy']*100:.2f} %")
        print('inference time = ', f"{info['inference_time']*1000:.2f} ms")
        print('')

RNN bidirect
train time =  81.84 sec
accuracy =  73.65 %
inference time =  1.20 ms

RNN without_bidirect
train time =  87.87 sec
accuracy =  71.41 %
inference time =  0.67 ms

GRU bidirect
train time =  111.02 sec
accuracy =  77.77 %
inference time =  3.02 ms

GRU without_bidirect
train time =  92.23 sec
accuracy =  74.79 %
inference time =  1.65 ms

LSTM bidirect
train time =  113.78 sec
accuracy =  78.01 %
inference time =  3.13 ms

LSTM without_bidirect
train time =  101.96 sec
accuracy =  74.87 %
inference time =  1.61 ms

