<a href="https://colab.research.google.com/github/Vitaly-lv/DS2022/blob/main/HW_4_NN_LeontevVV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Сравнить LSTM, RNN и GRU на задаче предсказания части речи (качество предсказания, скорость обучения, время инференса модели)

In [None]:
import datetime
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
data_dir = 'drive/My Drive/'
train_lang = 'en'

In [None]:
class DatasetSeq(Dataset):
    def __init__(self, data_dir, train_lang='en'):
	#open file
        with open(data_dir + train_lang + '.train', 'r') as f:
            train = f.read().split('\n\n')

        # delete extra tag markup
        train = [x for x in train if not '_ ' in x]
	    #init vocabs of tokens for encoding {<str> token: <int> id}
        self.target_vocab = {'<pad>': 0} # {p: 1, a: 2, r: 3, pu: 4}
        self.word_vocab = {'<pad>': 0} # {cat: 1, sat: 2, on: 3, mat: 4, '.': 5}
        self.char_vocab = {'<pad>': 0} # {c: 1, a: 2, t: 3, ' ': 4, s: 5}
	    
        self.encoded_sequences = []
        self.encoded_targets = []
        self.encoded_char_sequences = []
        n_word = 1
        n_target = 1
        n_char = 1
        for line in train:
            sequence = []
            target = []
            chars = []
            for item in line.split('\n'):
                if item != '':
                    word, label = item.split(' ')

                    if self.word_vocab.get(word) is None:
                        self.word_vocab[word] = n_word
                        n_word += 1
                    if self.target_vocab.get(label) is None:
                        self.target_vocab[label] = n_target
                        n_target += 1
                    for char in word:
                        if self.char_vocab.get(char) is None:
                            self.char_vocab[char] = n_char
                            n_char += 1
                    sequence.append(self.word_vocab[word])
                    target.append(self.target_vocab[label])
                    chars.append([self.char_vocab[char] for char in word])
            self.encoded_sequences.append(sequence)
            self.encoded_targets.append(target)
            self.encoded_char_sequences.append(chars)

    def __len__(self):
        return len(self.encoded_sequences)

    def __getitem__(self, index):
        return {
            'data': self.encoded_sequences[index], # [1, 2, 3, 4, 6] len=5
            'char': self.encoded_char_sequences[index],# [[1,2,3], [4,5], [1,2], [2,6,5,4], []] len=5
            'target': self.encoded_targets[index], # [1, 2, 3, 4, 6] len=5
        }

In [None]:
dataset = DatasetSeq(data_dir)

In [None]:
def collate_fn(batch):
    data = []
    target = []
    for item in batch:
        data.append(torch.as_tensor(item['data']))
        target.append(torch.as_tensor(item['target']))
    data = pad_sequence(data, batch_first=True, padding_value=0)
    target = pad_sequence(target, batch_first=True, padding_value=0)

    return {'data': data, 'target': target}

In [None]:
#hyper params
vocab_size = len(dataset.word_vocab) + 1
n_classes = len(dataset.target_vocab) + 1
n_chars = len(dataset.char_vocab) + 1
emb_dim = 256
hidden = 256
n_epochs = 10
batch_size = 64
cuda_device = 0
batch_size = 100
device = f'cuda:{cuda_device}' if cuda_device != -1 else 'cpu'

LSTM

In [None]:
class LSTM(nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_dim, n_classes):
        super().__init__()
        self.word_emb = nn.Embedding(vocab_size, emb_dim)
        self.rnn = nn.LSTM(emb_dim, hidden_dim, batch_first=True)
        self.clf = nn.Linear(hidden_dim, n_classes)
        self.do = nn.Dropout(0.1)
    
    def forward(self, x):
        emb = self.word_emb(x) # B x T x Emb_dim
        hidden, _ = self.rnn(emb) # B x T x Hid, B x 1 x Hid
        pred = self.clf(self.do(hidden)) # B x T x N_classes

        return pred

In [None]:
model = LSTM(vocab_size, emb_dim, hidden, n_classes).to(device)
model.train()
optim = torch.optim.Adam(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

In [None]:
#  Обучение LSTM
%%time
for epoch in range(n_epochs):
    dataloader = DataLoader(dataset, 
                            batch_size, 
                            shuffle=True, 
                            collate_fn=collate_fn,
                            drop_last = True,
                            )
    
    for i, batch in enumerate(dataloader):
        optim.zero_grad()
        predict = model(batch['data'].to(device))
        loss = loss_func(predict.view(-1, n_classes),
                         batch['target'].to(device).view(-1), 
                         )
        loss.backward()
        optim.step()
        if i % 100 == 0:
            print(f'epoch: {epoch}, step: {i}, loss: {loss.item()}')
   
    torch.save(model.state_dict(), f'./rnn_chkpt_{epoch}.pth')
print(loss.item())

epoch: 0, step: 0, loss: 2.877631664276123
epoch: 0, step: 100, loss: 0.2676323652267456
epoch: 0, step: 200, loss: 0.13285693526268005
epoch: 1, step: 0, loss: 0.18667706847190857
epoch: 1, step: 100, loss: 0.24471642076969147
epoch: 1, step: 200, loss: 0.17065216600894928
epoch: 2, step: 0, loss: 0.16327928006649017
epoch: 2, step: 100, loss: 0.13714388012886047
epoch: 2, step: 200, loss: 0.06251411885023117
epoch: 3, step: 0, loss: 0.07345478236675262
epoch: 3, step: 100, loss: 0.05372733622789383
epoch: 3, step: 200, loss: 0.09181215614080429
epoch: 4, step: 0, loss: 0.057906899601221085
epoch: 4, step: 100, loss: 0.08990530669689178
epoch: 4, step: 200, loss: 0.07005158811807632
epoch: 5, step: 0, loss: 0.04683231934905052
epoch: 5, step: 100, loss: 0.053118180483579636
epoch: 5, step: 200, loss: 0.04486418142914772
epoch: 6, step: 0, loss: 0.051417771726846695
epoch: 6, step: 100, loss: 0.06038067117333412
epoch: 6, step: 200, loss: 0.030528919771313667
epoch: 7, step: 0, loss: 0

In [None]:
# Инференс LSTM
%%time
phrase = 'He ran quickly after the red bus and caught it'
words = phrase.split(' ')
tokens = [dataset.word_vocab[w] for w in words]

start = datetime.datetime.now()
with torch.no_grad():
    model.eval()
    predict = model(torch.tensor(tokens).unsqueeze(0).to(device)) # 1 x T x N_classes
    labels = torch.argmax(predict, dim=-1).squeeze().cpu().detach().tolist()
    end = datetime.datetime.now() - start

target_labels = list(dataset.target_vocab.keys())
print([target_labels[l] for l in labels])
print(end)

['PRON', 'VERB', 'ADV', 'ADP', 'DET', 'ADJ', 'NOUN', 'CCONJ', 'VERB', 'PRON']
0:00:00.004445
CPU times: user 3.18 ms, sys: 0 ns, total: 3.18 ms
Wall time: 4.76 ms


RNN

In [None]:
class RNN(nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_dim, n_classes):
        super().__init__()
        self.word_emb = nn.Embedding(vocab_size, emb_dim)
        self.rnn = nn.RNN(emb_dim, hidden_dim, batch_first=True)
        self.clf = nn.Linear(hidden_dim, n_classes)
        self.do = nn.Dropout(0.1)
    
    def forward(self, x):
        emb = self.word_emb(x) # B x T x Emb_dim
        hidden, _ = self.rnn(emb) # B x T x Hid, B x 1 x Hid
        pred = self.clf(self.do(hidden)) # B x T x N_classes

        return pred

In [None]:
model = RNN(vocab_size, emb_dim, hidden, n_classes).to(device)
model.train()
optim = torch.optim.Adam(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

In [None]:
#  Обучение RNN
%%time
for epoch in range(n_epochs):
    dataloader = DataLoader(dataset, 
                            batch_size, 
                            shuffle=True, 
                            collate_fn=collate_fn,
                            drop_last = True,
                            )
    
    for i, batch in enumerate(dataloader):
        optim.zero_grad()
        predict = model(batch['data'].to(device))
        loss = loss_func(predict.view(-1, n_classes),
                         batch['target'].to(device).view(-1), 
                         )
        loss.backward()
        optim.step()
        if i % 100 == 0:
            print(f'epoch: {epoch}, step: {i}, loss: {loss.item()}')
   
    torch.save(model.state_dict(), f'./rnn_chkpt_{epoch}.pth')
print(loss.item())

epoch: 0, step: 0, loss: 3.3509182929992676
epoch: 0, step: 100, loss: 0.18653209507465363
epoch: 0, step: 200, loss: 0.20894348621368408
epoch: 1, step: 0, loss: 0.16363611817359924
epoch: 1, step: 100, loss: 0.13605424761772156
epoch: 1, step: 200, loss: 0.1820068061351776
epoch: 2, step: 0, loss: 0.08775459975004196
epoch: 2, step: 100, loss: 0.1355336606502533
epoch: 2, step: 200, loss: 0.09294583648443222
epoch: 3, step: 0, loss: 0.08932837843894958
epoch: 3, step: 100, loss: 0.11126276105642319
epoch: 3, step: 200, loss: 0.11403033137321472
epoch: 4, step: 0, loss: 0.06888501346111298
epoch: 4, step: 100, loss: 0.09709928929805756
epoch: 4, step: 200, loss: 0.07743091881275177
epoch: 5, step: 0, loss: 0.04977721348404884
epoch: 5, step: 100, loss: 0.0803999975323677
epoch: 5, step: 200, loss: 0.07250642031431198
epoch: 6, step: 0, loss: 0.02254757471382618
epoch: 6, step: 100, loss: 0.06630387157201767
epoch: 6, step: 200, loss: 0.040526919066905975
epoch: 7, step: 0, loss: 0.034

In [None]:
# Инференс RNN
%%time
phrase = 'He ran quickly after the red bus and caught it'
words = phrase.split(' ')
tokens = [dataset.word_vocab[w] for w in words]

start = datetime.datetime.now()
with torch.no_grad():
    model.eval()
    predict = model(torch.tensor(tokens).unsqueeze(0).to(device)) # 1 x T x N_classes
    labels = torch.argmax(predict, dim=-1).squeeze().cpu().detach().tolist()
    end = datetime.datetime.now() - start

target_labels = list(dataset.target_vocab.keys())
print([target_labels[l] for l in labels])
print(end)

['PRON', 'VERB', 'ADV', 'ADP', 'DET', 'ADJ', 'NOUN', 'CCONJ', 'VERB', 'PRON']
0:00:00.003450
CPU times: user 2.59 ms, sys: 0 ns, total: 2.59 ms
Wall time: 3.67 ms


In [None]:
class GRU(nn.Module):
    def __init__(self, vocab_size, emb_dim, hidden_dim, n_classes):
        super().__init__()
        self.word_emb = nn.Embedding(vocab_size, emb_dim)
        self.rnn = nn.GRU(emb_dim, hidden_dim, batch_first=True)
        self.clf = nn.Linear(hidden_dim, n_classes)
        self.do = nn.Dropout(0.1)
    
    def forward(self, x):
        emb = self.word_emb(x) # B x T x Emb_dim
        hidden, _ = self.rnn(emb) # B x T x Hid, B x 1 x Hid
        pred = self.clf(self.do(hidden)) # B x T x N_classes

        return pred

In [None]:
model = GRU(vocab_size, emb_dim, hidden, n_classes).to(device) # изменить название класса на GRU
model.train()
optim = torch.optim.Adam(model.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

In [None]:
%%time
#  Обучение GRU
for epoch in range(n_epochs):
    dataloader = DataLoader(dataset, 
                            batch_size, 
                            shuffle=True, 
                            collate_fn=collate_fn,
                            drop_last = True,
                            )
    for i, batch in enumerate(dataloader):
        optim.zero_grad()

        predict = model(batch['data'].to(device))
        loss = loss_func(predict.view(-1, n_classes),
                         batch['target'].to(device).view(-1), 
                         )
        loss.backward()
        optim.step()
        if i % 100 == 0:
            print(f'epoch: {epoch}, step: {i}, loss: {loss.item()}')
   
    torch.save(model.state_dict(), f'./rnn_chkpt_{epoch}.pth')
print(loss.item())

epoch: 0, step: 0, loss: 3.2314696311950684
epoch: 0, step: 100, loss: 0.15046754479408264
epoch: 0, step: 200, loss: 0.13895779848098755
epoch: 1, step: 0, loss: 0.215590700507164
epoch: 1, step: 100, loss: 0.15566414594650269
epoch: 1, step: 200, loss: 0.15369762480258942
epoch: 2, step: 0, loss: 0.10072372108697891
epoch: 2, step: 100, loss: 0.13435181975364685
epoch: 2, step: 200, loss: 0.11630139499902725
epoch: 3, step: 0, loss: 0.06856875121593475
epoch: 3, step: 100, loss: 0.09352385252714157
epoch: 3, step: 200, loss: 0.1022975817322731
epoch: 4, step: 0, loss: 0.07787217199802399
epoch: 4, step: 100, loss: 0.04826611652970314
epoch: 4, step: 200, loss: 0.07133965194225311
epoch: 5, step: 0, loss: 0.0549222007393837
epoch: 5, step: 100, loss: 0.06577060371637344
epoch: 5, step: 200, loss: 0.05282406881451607
epoch: 6, step: 0, loss: 0.061561696231365204
epoch: 6, step: 100, loss: 0.04331716522574425
epoch: 6, step: 200, loss: 0.05756833031773567
epoch: 7, step: 0, loss: 0.0411

In [None]:
# Инференс GRU
%%time
phrase = 'He ran quickly after the red bus and caught it'
words = phrase.split(' ')
tokens = [dataset.word_vocab[w] for w in words]

start = datetime.datetime.now()
with torch.no_grad():
    model.eval()
    predict = model(torch.tensor(tokens).unsqueeze(0).to(device)) # 1 x T x N_classes
    labels = torch.argmax(predict, dim=-1).squeeze().cpu().detach().tolist()
    end = datetime.datetime.now() - start

target_labels = list(dataset.target_vocab.keys())
print([target_labels[l] for l in labels])
print(end)

['PRON', 'VERB', 'ADV', 'ADP', 'DET', 'ADJ', 'NOUN', 'CCONJ', 'VERB', 'PRON']
0:00:00.001987
CPU times: user 2.75 ms, sys: 30 µs, total: 2.78 ms
Wall time: 2.18 ms


По точности на обучающей выборке показатели выше у GRU, меньше всего по времени на обучение затрачивается RNN, меньшее время инференса у GRU.  