# Домашнее задание 2

### Описание

В вашем распоряжении датасет с русскоязычными отзывами о мобильных телефонах с выставленным рейтингом от 1 до 5.
Ключевая задача – обучить любую модель регрессии (или классификации, если решите таким путём пойти) из пакетов scikit, XGBoost, LightGBM, CatBoost.


Необходимая метрика:

1. Со звёздочкой (дополнительный балл) – MAE <= 0.5
2. Минимальное допустимое значение – МАЕ <= 1.0

### Что необходимо сделать

1. Откройте датасет
2. Разделите на обучение и тест
3. Осуществите лемматизацию с помощью любого из озвученных на занятии инструментов 
4. Обучение одну или несколько моделей машинного обучения на разных представлениях данных
5. Валидируйте модель. Если модель соответствует условиям метрик, то работа завершена. В ином случае, экспериментируйте, начиная с пункта 7. 
6. По всем попыткам обучить качественную модель пишите свои выводы и замечания, почему так получилось.


## 0. Импорт библиотк, определение констант

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import re
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')
stopwords = set(stopwords.words('english'))

import torch
from torch import nn
from torch.optim import Adam
from torch.utils.data import TensorDataset, DataLoader

import torch.nn.functional as F
import torch.optim as optim

from sklearn.metrics import classification_report, confusion_matrix

import os
from tqdm import tqdm
tqdm.pandas()
from collections import Counter

[nltk_data] Downloading package stopwords to /home/tiv/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /home/tiv/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /home/tiv/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Загрузка и обработка данных

In [2]:
if os.path.exists("data/data_lemma_cleared.csv"):
    df = pd.read_csv("data/data_lemma_cleared.csv", engine='python')

df.head()   

Unnamed: 0,Review,Rating,lemma
0,3d touch просто восхитительная вещь заряд дер...,5.0,3d touch просто восхитительный вещь заряд держ...
1,отключается при температуре близкой к нулю не...,4.0,отключаться температура близкий нуль непонятно...
2,в apple окончательно решили не заморачиваться ...,3.0,apple окончательно решить не заморачиваться де...
3,постарался наиболее ёмко и коротко описать все...,4.0,постараться наиболее ёмко коротко описать всё ...
4,достойный телефон пользоваться одно удовольст...,5.0,достойный телефон пользоваться удовольствие


In [3]:
df = df.drop('Review', axis=1)
df = df.dropna()
df.columns = ['label', 'review']

### Оставлю только небольшой кусок данных на время разработки модели

In [4]:
#df = df[:10000]

In [5]:
reviews = df.review.values
words = ' '.join(reviews)
words = words.split()

print(len(words))
words[:10]

14626103


['3d',
 'touch',
 'просто',
 'восхитительный',
 'вещь',
 'заряд',
 'держать',
 'целый',
 'день',
 'розовый']

In [6]:
counter = Counter(words)
vocab = sorted(counter, key=counter.get, reverse=True)
int2word = dict(enumerate(vocab, 1))
int2word[0] = '<PAD>'
word2int = {word: id for id, word in int2word.items()}

In [7]:
len(word2int)

220502

In [8]:
reviews_enc = []

for review in tqdm(reviews):
    reviews_enc += [[]]
    
    for word in review.split():
        reviews_enc[-1].append(word2int[word])

for i in range(5):
    print(reviews_enc[i][:5])

100%|██████████| 319791/319791 [00:04<00:00, 77146.26it/s]

[1396, 1639, 19, 5984, 395]
[1005, 2310, 813, 2561, 1349]
[587, 1964, 132, 1, 2138]
[1819, 2905, 31073, 4916, 788]
[223, 2, 17, 641]





In [9]:
def pad_features(reviews, pad_id, seq_length=128):
    features = np.full((len(reviews), 
                        seq_length), 
                       pad_id, 
                       dtype=int)

    for i, row in enumerate(reviews):
        features[i, :len(row)] = np.array(row)[:seq_length]

    return features

seq_length = 256
features = pad_features(reviews_enc, 
                        pad_id=word2int['<PAD>'], 
                        seq_length=seq_length)

assert len(features) == len(reviews_enc)
assert len(features[0]) == seq_length

features.shape

(319791, 256)

In [10]:
labels = df.label.to_numpy()
labels

array([5., 4., 3., ..., 5., 5., 5.])

In [11]:
train_size = .7
val_size = .5

split_id = int(len(features) * train_size)
train_x, remain_x = features[:split_id], features[split_id:]
train_y, remain_y = labels[:split_id], labels[split_id:]

split_val_id = int(len(remain_x) * val_size)
val_x, test_x = remain_x[:split_val_id], remain_x[split_val_id:]
val_y, test_y = remain_y[:split_val_id], remain_y[split_val_id:]

print('Feature Shapes:')
print('===============')
print('Train set: {}'.format(train_x.shape))
print('Validation set: {}'.format(val_x.shape))
print('Test set: {}'.format(test_x.shape))

Feature Shapes:
Train set: (223853, 256)
Validation set: (47969, 256)
Test set: (47969, 256)


In [12]:
batch_size  = 64

trainset = TensorDataset(torch.from_numpy(train_x), 
                         torch.from_numpy(train_y))


validset = TensorDataset(torch.from_numpy(val_x), 
                         torch.from_numpy(val_y))

testset = TensorDataset(torch.from_numpy(test_x), 
                        torch.from_numpy(test_y))

train_iterator = DataLoader(trainset, 
                            shuffle=True, 
                            batch_size=batch_size)

valid_iterator = DataLoader(validset, 
                            shuffle=True, 
                            batch_size=batch_size)

test_iterator = DataLoader(testset, 
                           shuffle=True, 
                           batch_size=batch_size)


## CNN

### Функция подсчета accuracy

In [None]:
def binary_accuracy(preds, y):
    rounded_preds = torch.round(F.sigmoid(preds))
    correct = (rounded_preds == y).float()
    acc = correct.sum() / len(correct)
    return acc

In [None]:
def mae(preds, y):
    rounded_preds = torch.round(preds)
    error = torch.mean(torch.abs(rounded_preds - y).float())
    return error

### Функция обучения сети

In [None]:

def train_func(model, iterator, optimizer, criterion):
    epoch_loss = 0
    epoch_acc = 0

    model.train()
    model.cuda()
    
    for batch in iterator:
        optimizer.zero_grad()
        
        predictions = model(batch[0].T.cuda()).squeeze(1)
        
        loss = criterion(predictions.float(), 
                          batch[1].float().cuda())
        
        acc = mae(predictions.float(), 
                              batch[1].float().cuda())
        
        loss.backward()
        optimizer.step()

        epoch_loss += loss
        epoch_acc += acc

    return epoch_loss / len(iterator), epoch_acc / len(iterator)


In [None]:
def evaluate_func(model, iterator, criterion):
    epoch_loss = 0
    epoch_acc = 0

    model.eval()

    with torch.no_grad():
        for batch in iterator:
            predictions = model(batch[0].T.cuda()).squeeze(1)
            
            loss = criterion(predictions.float(), 
                              batch[1].float().cuda())
            
            acc = mae(predictions.float(), 
                                  batch[1].float().cuda())
            
            epoch_loss += loss
            epoch_acc += acc

    return epoch_loss / len(iterator), epoch_acc / len(iterator)

### Архитектура сети

In [None]:
import torch.nn as nn

class CNN(nn.Module):
    def __init__(self, 
                 vocab_size,
                 embedding_dim, 
                 n_filters, 
                 filter_sizes, 
                 output_dim, 
                 dropout):
        
        super().__init__()

        self.embedding = nn.Embedding(vocab_size, 
                                      embedding_dim)
        
        self.conv_0 = nn.Conv2d(in_channels=1, 
                                out_channels=n_filters, 
                                kernel_size=(filter_sizes[0], 
                                             embedding_dim))
        
        self.conv_1 = nn.Conv2d(in_channels=1, 
                                out_channels=n_filters, 
                                kernel_size=(filter_sizes[1], 
                                             embedding_dim))
        
        self.conv_2 = nn.Conv2d(in_channels=1, 
                                out_channels=n_filters, 
                                kernel_size=(filter_sizes[2], 
                                             embedding_dim))
        
        self.fc = nn.Linear(len(filter_sizes) * n_filters, 
                            output_dim)
        
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        #x = [sent len, batch size]
        x = x.permute(1, 0)

        #x = [batch size, sent len]
        embedded = self.embedding(x)

        #embedded = [batch size, sent len, emb dim]
        embedded = embedded.unsqueeze(1)

        #embedded = [batch size, 1, sent len, emb dim]
        conved_0 = F.relu(self.conv_0(embedded).squeeze(3))
        conved_1 = F.relu(self.conv_1(embedded).squeeze(3))
        conved_2 = F.relu(self.conv_2(embedded).squeeze(3))

        #conv_n = [batch size, n_filters, sent len - filter_sizes[n]]
        pooled_0 = F.max_pool1d(conved_0, conved_0.shape[2]).squeeze(2)
        pooled_1 = F.max_pool1d(conved_1, conved_1.shape[2]).squeeze(2)
        pooled_2 = F.max_pool1d(conved_2, conved_2.shape[2]).squeeze(2)

        #pooled_n = [batch size, n_filters]
        cat = self.dropout(torch.cat((pooled_0, pooled_1, pooled_2), dim=1))

        #cat = [batch size, n_filters * len(filter_sizes)]
        return self.fc(cat)
    

In [None]:

INPUT_DIM = len(word2int)
EMBEDDING_DIM = 100
N_FILTERS = 100
FILTER_SIZES = [3,4,5]
OUTPUT_DIM = 1
DROPOUT = 0.5

model = CNN(INPUT_DIM, 
            EMBEDDING_DIM, 
            N_FILTERS, 
            FILTER_SIZES, 
            OUTPUT_DIM, 
            DROPOUT)

In [None]:

optimizer = optim.Adam(model.parameters())
criterion = torch.nn.MSELoss()

model = model.cuda()

In [None]:
model

CNN(
  (embedding): Embedding(220502, 100)
  (conv_0): Conv2d(1, 100, kernel_size=(3, 100), stride=(1, 1))
  (conv_1): Conv2d(1, 100, kernel_size=(4, 100), stride=(1, 1))
  (conv_2): Conv2d(1, 100, kernel_size=(5, 100), stride=(1, 1))
  (fc): Linear(in_features=300, out_features=1, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [None]:
N_EPOCHS = 10

for epoch in range(N_EPOCHS):
    train_loss, train_acc = train_func(model, 
                                       train_iterator, 
                                       optimizer, 
                                       criterion)
    
    valid_loss, valid_acc = evaluate_func(model, 
                                          valid_iterator, 
                                          criterion)
    
    train_msg = f'Epoch: {epoch+1:02}, '
    train_msg += f'Train Loss: {train_loss:.3f}, '
    train_msg += f'Train Acc: {train_acc:.2f}, '
    train_msg += f'Val. Loss: {valid_loss:.3f}, '
    train_msg += f'Val. Acc: {valid_acc:.2f}'
    
    print(train_msg)

Epoch: 01, Train Loss: 0.590, Train Acc: 53.01%, Val. Loss: 0.839, Val. Acc: 64.69%
Epoch: 02, Train Loss: 0.538, Train Acc: 49.64%, Val. Loss: 0.841, Val. Acc: 64.30%
Epoch: 03, Train Loss: 0.497, Train Acc: 46.69%, Val. Loss: 0.856, Val. Acc: 63.48%
Epoch: 04, Train Loss: 0.461, Train Acc: 44.12%, Val. Loss: 0.885, Val. Acc: 67.24%
Epoch: 05, Train Loss: 0.437, Train Acc: 42.05%, Val. Loss: 0.862, Val. Acc: 63.94%
Epoch: 06, Train Loss: 0.416, Train Acc: 40.54%, Val. Loss: 0.858, Val. Acc: 63.84%
Epoch: 07, Train Loss: 0.399, Train Acc: 39.03%, Val. Loss: 0.849, Val. Acc: 63.57%
Epoch: 08, Train Loss: 0.384, Train Acc: 38.00%, Val. Loss: 0.869, Val. Acc: 63.31%
Epoch: 09, Train Loss: 0.373, Train Acc: 36.99%, Val. Loss: 0.882, Val. Acc: 64.20%
Epoch: 10, Train Loss: 0.361, Train Acc: 35.88%, Val. Loss: 0.869, Val. Acc: 63.12%


In [None]:

test_loss , test_acc = evaluate_func(model, 
                                     test_iterator, 
                                     criterion)

print(f'Test Loss: {test_loss:.3f}, Test Acc: {test_acc:.2f}')


Test Loss: 0.908, Test Acc: 0.67


## RNN

In [13]:
dataiter = iter(train_iterator)
x, y = next(dataiter)

print('Sample batch size: ', x.size()) 
print('Sample batch input: \n', x)
print()
print('Sample label size: ', y.size())
print('Sample label input: \n', y)

Sample batch size:  torch.Size([64, 256])
Sample batch input: 
 tensor([[2419,   16,    2,  ...,    0,    0,    0],
        [  41, 1713,   33,  ...,    0,    0,    0],
        [ 349,   81,  876,  ...,    0,    0,    0],
        ...,
        [  62,   17,  108,  ...,    0,    0,    0],
        [   2,   22,  783,  ...,    0,    0,    0],
        [  36,   77,   11,  ...,    0,    0,    0]])

Sample label size:  torch.Size([64])
Sample label input: 
 tensor([5., 3., 3., 1., 1., 5., 5., 4., 5., 5., 4., 5., 3., 5., 5., 5., 4., 5.,
        5., 3., 5., 3., 4., 5., 1., 2., 2., 4., 1., 2., 2., 2., 5., 5., 4., 5.,
        3., 5., 5., 5., 4., 3., 5., 5., 2., 4., 1., 5., 5., 5., 5., 4., 4., 4.,
        5., 3., 4., 5., 4., 1., 5., 5., 5., 5.], dtype=torch.float64)


### Моделирование

In [46]:
class SentimentModel(nn.Module):
    def __init__(self, vocab_size, output_size, hidden_size=128, 
                 embedding_size=400, n_layers=2, dropout=0.2):
        
        super(SentimentModel, self).__init__()

        self.embedding = nn.Embedding(vocab_size, embedding_size)

        self.lstm = nn.LSTM(embedding_size, hidden_size, n_layers, 
                            dropout=dropout, batch_first=True)

        self.dropout = nn.Dropout(0.3)

        self.fc = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()
        
        self.fc2 = nn.Linear(embedding_size, hidden_size)

    def forward(self, x):
        
        # convert feature to long
        x = x.long()

        # map input to vector
        x = self.embedding(x)
        
        #x = self.fc2(x)
        
        # pass forward to lstm
        x, _ =  self.lstm(x)

        # get last sequence output
        x = x[:, -1, :]

        # apply dropout and fully connected layer
        x = self.dropout(x)
        x = self.fc(x)
        
        # sigmoid
        # o = self.sigmoid(o)
        
        # o = o * 5

        return x

In [47]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = 'cpu'
print(device)

cuda


In [48]:
vocab_size = len(word2int)
output_size = 1
embedding_size = 256
hidden_size = 512
n_layers = 2
dropout=0.25

model = SentimentModel(vocab_size, 
                       output_size, 
                       hidden_size, 
                       embedding_size, 
                       n_layers, 
                       dropout)
print(model)

SentimentModel(
  (embedding): Embedding(220502, 256)
  (lstm): LSTM(256, 512, num_layers=2, batch_first=True, dropout=0.25)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc): Linear(in_features=512, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (fc2): Linear(in_features=256, out_features=512, bias=True)
)


In [49]:
lr = 0.0001
criterion = torch.nn.MSELoss()
optim = Adam(model.parameters(), lr=lr)
grad_clip = 1
epochs = 5
print_every = 1
history = {
    'train_loss': [],
    'train_acc': [],
    'train_mae': [],
    'val_loss': [],
    'val_acc': [],
    'val_mae': [],
    'epochs': epochs
}
es_limit = 5

In [50]:
epochs = 5

model = SentimentModel(vocab_size, 
                       output_size, 
                       hidden_size, 
                       embedding_size, 
                       n_layers, 
                       dropout)

criterion = torch.nn.MSELoss()
optim = Adam(model.parameters(), lr=lr)
grad_clip = 1
epochs = 5
print_every = 1
history = {
    'train_loss': [],
    'train_acc': [],
    'train_mae': [],
    'val_loss': [],
    'val_acc': [],
    'val_mae': [],
    'epochs': epochs
}
es_limit = 5

model = model.to(device)

epochloop = tqdm(range(epochs), position=0, desc='Training', leave=True)

# early stop trigger
es_trigger = 0
val_loss_min = 1000 #torch.inf

for e in epochloop:

    # Обучение
    
    model.train()

    train_loss = 0
    train_acc = 0
    train_mae = 0
    
    for id_, (feature, target) in enumerate(train_iterator):
        epochloop.set_postfix_str(f'Training batch {id_}/{len(train_iterator)}')

        feature, target = feature.to(device), target.to(device)

        optim.zero_grad()

        out = model(feature)
        #print(out[:5])
        #predicted = torch.tensor([1 if i == True else 0 for i in out > 0.5], device=device)
        #predicted = torch.tensor(torch.round(out), device=device)
        predicted = torch.round(out.squeeze().clone().detach())
        #predicted = out.clone().detach()
        #predicted = torch.tensor(out, device=device)
        #print('-------- OUT')
        #print(out)        
        #print('--------- OUT squeeze')
        #print(out.squeeze(),)       
        #print('PREDICTED')
        #print(predicted)
        #print('TARGET')
        #print(target)
        equals = predicted == target
        acc = torch.mean(equals.type(torch.FloatTensor))
        train_acc += acc.item()
        mae = torch.mean(torch.abs(predicted - target).float())
        train_mae += mae.item()
        
        loss = criterion(out.squeeze(), target.float())
        train_loss += loss.item()
        loss.backward()

        nn.utils.clip_grad_norm_(model.parameters(), grad_clip)

        optim.step()

        del feature, target, predicted

    history['train_loss'].append(train_loss / len(train_iterator))
    history['train_acc'].append(train_acc / len(train_iterator))
    history['train_mae'].append(train_mae / len(train_iterator))

    # Валидация
    model.eval()

    val_loss = 0
    val_acc = 0
    val_mae = 0
    
    with torch.no_grad():
        for id_, (feature, target) in enumerate(valid_iterator):
            epochloop.set_postfix_str(f'Validation batch {id_}/{len(valid_iterator)}')
            
            feature, target = feature.to(device), target.to(device)

            out = model(feature)

            #predicted = torch.tensor([1 if i == True else 0 for i in out > 0.5], device=device)
            
            predicted = torch.round(out.clone().detach())
            #predicted = out.clone().detach()
            equals = predicted == target
            acc = torch.mean(equals.type(torch.FloatTensor))
            val_acc += acc.item()
            mae = torch.mean(torch.abs(predicted - target).float())
            val_mae += mae.item()
            
            loss = criterion(out.squeeze(), target.float())
            val_loss += loss.item()

            del feature, target, predicted

        history['val_loss'].append(val_loss / len(valid_iterator))
        history['val_acc'].append(val_acc / len(valid_iterator))
        history['val_mae'].append(val_mae / len(valid_iterator))
    
    # Возвращаем модель в режим обучения
    # Возвращаем модель в режим обучения
    model.train()

    info_str = f'Val Loss: {val_loss / len(valloader):.3f} '
    info_str += f'| Val mae: {val_mae / len(valloader):.3f}'
    epochloop.set_postfix_str(info_str)

    if (e+1) % print_every == 0:
        info_str = f'Epoch {e+1}/{epochs} | Train Loss: {train_loss / len(trainloader):.3f} '
        info_str += f'Train mae: {train_mae / len(trainloader):.3f} '
        info_str += f'| Val Loss: {val_loss / len(valloader):.3f} '
        info_str += f'Val mae: {val_mae / len(valloader):.3f}'
        
        epochloop.write(info_str)
        epochloop.update()

    if val_loss / len(valloader) <= val_loss_min:
        torch.save(model.state_dict(), './sentiment_lstm.pt')
        val_loss_min = val_loss / len(valloader)
        es_trigger = 0
    else:
        info_str = '[WARNING] Validation loss did not improved ('
        info_str += f'{val_loss_min:.3f} --> {val_loss / len(valloader):.3f})'
        
        epochloop.write(info_str)
        es_trigger += 1

    if es_trigger >= es_limit:
        epochloop.write(f'Early stopped at Epoch-{e+1}')
        history['epochs'] = e+1
        break

Training:  20%|██        | 1/5 [04:04<16:16, 244.20s/it, Val Loss: 50.485 | Val mae: 30.810]

Epoch 1/5 | Train Loss: 51.985 Train mae: 31.729 | Val Loss: 50.485 Val mae: 30.810


Training:  60%|██████    | 3/5 [08:14<05:37, 168.80s/it, Val Loss: 50.484 | Val mae: 30.809]

Epoch 2/5 | Train Loss: 50.583 Train mae: 31.467 | Val Loss: 50.484 Val mae: 30.809


Training:  80%|████████  | 4/5 [12:36<03:25, 205.85s/it, Val Loss: 50.287 | Val mae: 30.806]

Epoch 3/5 | Train Loss: 50.655 Train mae: 31.481 | Val Loss: 50.287 Val mae: 30.806


Training: 6it [16:54, 174.18s/it, Training batch 3/3498]                                    

Epoch 4/5 | Train Loss: 50.469 Train mae: 31.467 | Val Loss: 50.739 Val mae: 30.810


Training: 100%|██████████| 5/5 [21:09<00:00, 253.96s/it, Val Loss: 50.405 | Val mae: 30.812]

Epoch 5/5 | Train Loss: 50.500 Train mae: 31.462 | Val Loss: 50.405 Val mae: 30.812





In [19]:
trainloader = train_iterator
valloader = valid_iterator


In [20]:
def rnn_mogel_explore(learn_rate=0.0001, epoch=8, layers=2, drop=0.25):

    vocab_size = len(word2int)
    output_size = 1
    embedding_size = 256
    hidden_size = 512
    n_layers = layers
    dropout = drop
    
    model = SentimentModel(vocab_size, 
                           output_size, 
                           hidden_size, 
                           embedding_size, 
                           n_layers, 
                           dropout)
    print(model)
    
    lr = learn_rate
    criterion = nn.MSELoss()
    optim = Adam(model.parameters(), lr=lr)
    grad_clip = 5
    epochs = epoch
    print_every = 1
    history = {
        'train_loss': [],
        'train_mae': [],
        'val_loss': [],
        'val_mae': [],
        'epochs': epochs
    }
    es_limit = 5
    
    model = model.to(device)
    
    epochloop = tqdm(range(epochs), position=0, desc='Training', leave=True)
    
    # early stop trigger
    es_trigger = 0
    #val_loss_min = torch.inf
    val_loss_min = 1000
    
    for e in epochloop:
    
        # Обучение
        
        model.train()
    
        train_loss = 0
        train_mae = 0
    
        for id_, (feature, target) in enumerate(trainloader):
            epochloop.set_postfix_str(f'Training batch {id_}/{len(trainloader)}')
    
            feature, target = feature.to(device), target.to(device)
    
            optim.zero_grad()
    
            out = model(feature)
            #print(out[:5])
            #predicted = torch.tensor([1 if i == True else 0 for i in out > 0.5], device=device)
            predicted = torch.tensor(out, device=device)
            #print(predicted[:5])
            #print(target[:5])
            #equals = predicted == target
            #acc = torch.mean(equals.type(torch.FloatTensor))
            mae = torch.mean(torch.abs(target - predicted))
            train_mae += mae.item()
    
            loss = criterion(out.squeeze(), target.float())
            train_loss += loss.item()
            loss.backward()
    
            nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
    
            optim.step()
    
            del feature, target, predicted
    
        history['train_loss'].append(train_loss / len(trainloader))
        history['train_mae'].append(train_mae / len(trainloader))
    
        # Валидация
        model.eval()
    
        val_loss = 0
        val_mae = 0
    
        with torch.no_grad():
            for id_, (feature, target) in enumerate(valloader):
                epochloop.set_postfix_str(f'Validation batch {id_}/{len(valloader)}')
                
                feature, target = feature.to(device), target.to(device)
    
                out = model(feature)
                #print(out[:5])
    
                #predicted = torch.tensor([1 if i == True else 0 for i in out > 0.5], device=device)
                predicted = torch.tensor(out, device=device)
                #equals = predicted == target
                #acc = torch.mean(equals.type(torch.FloatTensor))
                mae = torch.mean(torch.abs(target - predicted))
                val_mae += mae.item()
    
                loss = criterion(out.squeeze(), target.float())
                val_loss += loss.item()
    
                del feature, target, predicted
    
            history['val_loss'].append(val_loss / len(valloader))
            history['val_mae'].append(val_mae / len(valloader))
        
        # Возвращаем модель в режим обучения
        model.train()
    
        info_str = f'Val Loss: {val_loss / len(valloader):.3f} '
        info_str += f'| Val mae: {val_mae / len(valloader):.3f}'
        epochloop.set_postfix_str(info_str)
    
        if (e+1) % print_every == 0:
            info_str = f'Epoch {e+1}/{epochs} | Train Loss: {train_loss / len(trainloader):.3f} '
            info_str += f'Train mae: {train_mae / len(trainloader):.3f} '
            info_str += f'| Val Loss: {val_loss / len(valloader):.3f} '
            info_str += f'Val mae: {val_mae / len(valloader):.3f}'
            
            epochloop.write(info_str)
            epochloop.update()

        if val_loss / len(valloader) <= val_loss_min:
            torch.save(model.state_dict(), './sentiment_lstm.pt')
            val_loss_min = val_loss / len(valloader)
            es_trigger = 0
        else:
            info_str = '[WARNING] Validation loss did not improved ('
            info_str += f'{val_loss_min:.3f} --> {val_loss / len(valloader):.3f})'
            
            epochloop.write(info_str)
            es_trigger += 1
    
        if es_trigger >= es_limit:
            epochloop.write(f'Early stopped at Epoch-{e+1}')
            history['epochs'] = e+1
            break
            
    plt.figure(figsize=(6, 6))
    plt.plot(range(history['epochs']), history['train_mae'], label='Train mae')
    plt.plot(range(history['epochs']), history['val_mae'], label='Val mae')
    plt.legend()
    plt.show()
    
    plt.figure(figsize=(6, 6))
    plt.plot(range(history['epochs']), history['train_loss'], label='Train Loss')
    plt.plot(range(history['epochs']), history['val_loss'], label='Val Loss')
    plt.legend()
    plt.show()
    
    model.eval()
    
    # metrics
    test_loss = 0
    test_mae = 0
    
    all_target = []
    all_predicted = []
    
    testloop = tqdm(testloader, leave=True, desc='Inference')
    with torch.no_grad():
        for feature, target in testloop:
            feature, target = feature.to(device), target.to(device)
    
            out = model(feature)
            
            predicted = torch.tensor(out, device=device)
            mae = torch.mean(torch.abs(target - predicted))
            test_mae += mae.item()
    
            loss = criterion(out.squeeze(), target.float())
            test_loss += loss.item()
    
            all_target.extend(target.cpu().numpy())
            all_predicted.extend(predicted.cpu().numpy())
    
    
    print(f'mae: {test_mae/len(testloader):.4f}, Loss: {test_loss/len(testloader):.4f}')


In [21]:
rnn_mogel_explore()

SentimentModel(
  (embedding): Embedding(29993, 256)
  (lstm): LSTM(256, 512, num_layers=2, batch_first=True, dropout=0.25)
  (dropout): Dropout(p=0.3, inplace=False)
  (fc): Linear(in_features=512, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (fc2): Linear(in_features=256, out_features=512, bias=True)
)


  predicted = torch.tensor(out, device=device)
  predicted = torch.tensor(out, device=device)
Training:  25%|██▌       | 2/8 [00:06<00:36,  6.04s/it, Training batch 3/110]            

Epoch 1/8 | Train Loss: 3.015 Train mae: 1.335 | Val Loss: 1.549 Val mae: 0.992


Training:  50%|█████     | 4/8 [00:12<00:15,  3.84s/it, Training batch 4/110]            

Epoch 2/8 | Train Loss: 1.676 Train mae: 1.053 | Val Loss: 1.558 Val mae: 1.006


Training:  25%|██▌       | 2/8 [00:14<00:44,  7.39s/it, Training batch 50/110]


KeyboardInterrupt: 