### 7. Рекурентные сети для обработки последовательностей
Попробуйте обучить нейронную сеть GRU/LSTM для предсказания сентимента сообщений с твитера на примере https://www.kaggle.com/datasets/arkhoshghalb/twitter-sentiment-analysis-hatred-speech

Опишите, какой результат вы получили? Что помогло вам улучшить ее точность?

In [2]:
import torch
import re
import pandas as pd
import numpy as np
import nltk

import torch.nn as nn
import torch.nn.functional as F

nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

from torch.utils.data import DataLoader, Dataset
from string import punctuation
from textblob import TextBlob, Word
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
from itertools import islice
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

nltk.download("punkt")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Инна\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Инна\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\Инна\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Инна\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

Загрузим данные и посмотрим на них

In [3]:
df_train = pd.read_csv('train.csv')
df_train.head()

Unnamed: 0,id,label,tweet
0,1,0,@user when a father is dysfunctional and is s...
1,2,0,@user @user thanks for #lyft credit i can't us...
2,3,0,bihday your majesty
3,4,0,#model i love u take with u all the time in ...
4,5,0,factsguide: society now #motivation


In [4]:
df_train['label'].value_counts()

0    29720
1     2242
Name: label, dtype: int64

Зададим ряд гиперпараметров, которые будут использоваться в дальнейшем процессе обучения.

In [5]:
max_words = 1500
max_len = 15
num_classes = 1
batch_size = 512

Сплитуем данные на трейн и тест

In [6]:
X_train, X_val, y_train, y_val = train_test_split(df_train['tweet'], 
                                                  df_train['label'], 
                                                  test_size=0.3, 
                                                  random_state=42, 
                                                  stratify=df_train['label'])

Предобработка данных

In [7]:
sw = set(stopwords.words("english"))
# Обработаем сет на наличие стоп-слов
# но часто встречается как текстовое представление символа - &amp; 
sw.add('amp')
# Добавим user, так как в данном датасете это является обезличенным 
# упоминанием пользователя в твите
sw.add('user')
sw

{'a',
 'about',
 'above',
 'after',
 'again',
 'against',
 'ain',
 'all',
 'am',
 'amp',
 'an',
 'and',
 'any',
 'are',
 'aren',
 "aren't",
 'as',
 'at',
 'be',
 'because',
 'been',
 'before',
 'being',
 'below',
 'between',
 'both',
 'but',
 'by',
 'can',
 'couldn',
 "couldn't",
 'd',
 'did',
 'didn',
 "didn't",
 'do',
 'does',
 'doesn',
 "doesn't",
 'doing',
 'don',
 "don't",
 'down',
 'during',
 'each',
 'few',
 'for',
 'from',
 'further',
 'had',
 'hadn',
 "hadn't",
 'has',
 'hasn',
 "hasn't",
 'have',
 'haven',
 "haven't",
 'having',
 'he',
 'her',
 'here',
 'hers',
 'herself',
 'him',
 'himself',
 'his',
 'how',
 'i',
 'if',
 'in',
 'into',
 'is',
 'isn',
 "isn't",
 'it',
 "it's",
 'its',
 'itself',
 'just',
 'll',
 'm',
 'ma',
 'me',
 'mightn',
 "mightn't",
 'more',
 'most',
 'mustn',
 "mustn't",
 'my',
 'myself',
 'needn',
 "needn't",
 'no',
 'nor',
 'not',
 'now',
 'o',
 'of',
 'off',
 'on',
 'once',
 'only',
 'or',
 'other',
 'our',
 'ours',
 'ourselves',
 'out',
 'over',
 'o

In [8]:
#Список знаков и спецсимволов
puncts = set(punctuation)
puncts

{'!',
 '"',
 '#',
 '$',
 '%',
 '&',
 "'",
 '(',
 ')',
 '*',
 '+',
 ',',
 '-',
 '.',
 '/',
 ':',
 ';',
 '<',
 '=',
 '>',
 '?',
 '@',
 '[',
 '\\',
 ']',
 '^',
 '_',
 '`',
 '{',
 '|',
 '}',
 '~'}

In [9]:
def preprocess_text(txt):
    txt = str(txt)
    # уберем нечитаемые символы типа  ð\x9f¤\x97
    txt = "".join([c for c in txt if ord(c) < 128])
    txt = "".join(c for c in txt if c not in puncts)
    txt = txt.lower()
    # преобразуем отрицания
    txt = re.sub("not\s", "not", txt)
    txt = re.sub("no\s", "no", txt)
    # будем приводить формы к глаголам
    txt = [Word(word).lemmatize('v') for word in txt.split() if word not in sw]
    return " ".join(txt)

In [10]:
X_train.iloc[:10].values #необработанный текст

array(['happy bihday to my brother man. needed this mixtape like we need boos. have a good one sach   @user ',
       '  lang to sta the week right :)  #happiness #smile ',
       'note it meditate on it work on it ,but most impoantly trust god for it #icantwaitfohedayhisplansformylifeunfold #grateful  ',
       '@user listening to you this wet mon, ahead of #leedsmillenium gig next month   ð\x9f\x98\x86ð\x9f\x91\x8dð\x9f\x98\x8d #music #ace ',
       '@user @user agreed.. the same is true for  and .. they are overused terms, and as a result, are fast becominâ\x80¦',
       'very exciting! #dubllife #recycle ',
       '#bad times #drink   #nobev ',
       '#ootd #converse #denim #tshi  #shopping  #like4like #l4l #f4f #instagood  ',
       '  #fathersday to the man of my dreams! you sacrificed bachelorhood for a ready-made familyâ\x80¦ ',
       '  #pougalday #pay #saturday #fresh #new #haircut &amp; new #red #car in #style #chillingâ\x80¦ '],
      dtype=object)

In [11]:
X_train.iloc[:10].apply(preprocess_text).values #обработанный текст

array(['happy bihday brother man need mixtape like need boo good one sach',
       'lang sta week right happiness smile',
       'note meditate work impoantly trust god icantwaitfohedayhisplansformylifeunfold grateful',
       'listen wet mon ahead leedsmillenium gig next month music ace',
       'agree true overuse term result fast becomin',
       'excite dubllife recycle', 'bad time drink nobev',
       'ootd converse denim tshi shop like4like l4l f4f instagood',
       'fathersday man dream sacrifice bachelorhood readymade family',
       'pougalday pay saturday fresh new haircut new red car style chill'],
      dtype=object)

In [12]:
X_train = X_train.apply(preprocess_text).values
X_val = X_val.apply(preprocess_text).values

Токенизация

In [13]:
train_corpus = " ".join(X_train)
train_corpus = train_corpus.lower()

In [14]:
tokens = word_tokenize(train_corpus)
tokens[:10]

['happy',
 'bihday',
 'brother',
 'man',
 'need',
 'mixtape',
 'like',
 'need',
 'boo',
 'good']

In [15]:
tokens_filtered = [word for word in tokens if word.isalnum()]
dist = FreqDist(tokens_filtered)
tokens_filtered_top = [pair[0] for pair in dist.most_common(max_words-1)]

# Посмотрим на топ 10 слов
tokens_filtered_top[:10]

['love', 'day', 'get', 'happy', 'go', 'time', 'make', 'im', 'u', 'life']

Словарь наиболее частотных слов

In [16]:
def take(n, iterable):
    return list(islice(iterable, n))

vocabulary = {v: k for k, v in dict(enumerate(tokens_filtered_top, 1)).items()}
take(20, vocabulary.items())

[('love', 1),
 ('day', 2),
 ('get', 3),
 ('happy', 4),
 ('go', 5),
 ('time', 6),
 ('make', 7),
 ('im', 8),
 ('u', 9),
 ('life', 10),
 ('like', 11),
 ('today', 12),
 ('new', 13),
 ('father', 14),
 ('see', 15),
 ('positive', 16),
 ('smile', 17),
 ('thankful', 18),
 ('people', 19),
 ('bihday', 20)]

In [17]:
def text_to_sequence(text, maxlen):
    result = []
    tokens = word_tokenize(text.lower())
    tokens_filtered = [word for word in tokens if word.isalnum()]
    for word in tokens_filtered:
        if word in vocabulary:
            result.append(vocabulary[word])

    padding = [0] * (maxlen-len(result))
    return result[-maxlen:] + padding

In [18]:
x_train = np.asarray([text_to_sequence(text, max_len) for text in X_train])
x_val = np.asarray([text_to_sequence(text, max_len) for text in X_val])

In [19]:
x_train[0]

array([  4,  20, 887, 131,  31,  11,  31,  21,  28,   0,   0,   0,   0,
         0,   0])

In [20]:
class DataWrapper(Dataset):
    def __init__(self, data, target, transform=None):
        self.data = torch.from_numpy(data).long()
        self.target = torch.from_numpy(target).long()
        self.transform = transform
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.target[index]
        
        if self.transform:
            x = self.transform(x)
            
        return x, y
    
    def __len__(self):
        return len(self.data)

In [21]:
train_dataset = DataWrapper(x_train, y_train.values)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

val_dataset = DataWrapper(x_val, y_val.values)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=True)

In [22]:
criterion = nn.BCELoss()

Инициализируем устройство, на котором будем обучать модель

In [23]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

Инициализируем и обучим сеть GRU на данных

In [24]:
n_epochs = [5, 10]
learning_rates = [1e-2, 1e-3]
e_dims = [128, 256]
h_dims = [64, 96]
ths = [0.3, 0.5]
dps = [0.1, 0.2, 0.3]

In [41]:
class GRUFixedLen(nn.Module) :
    def __init__(self, vocab_size, embedding_dim=128, hidden_dim=128, drop_prob=0.1, use_last=True):
        super().__init__()
        self.use_last = use_last
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=2, batch_first=True, dropout=drop_prob)
        self.linear = nn.Linear(hidden_dim, 1)
       # self.dropout = nn.Dropout(drop_prob)
        
    def forward(self, x):
        x = self.embeddings(x)
        #x = self.dropout(x)
        gru_out, ht = self.gru(x)
       
        if self.use_last:
            last_tensor = gru_out[:,-1,:]
        else:
            #use mean
            last_tensor = torch.mean(gru_out[:,:], dim=1)
    
        out = self.linear(last_tensor)
        return torch.sigmoid(out)

In [42]:
%%time
for epochs in n_epochs:
    for lr in learning_rates:
        for embedding_dim in e_dims:
            for hidden_dim in h_dims:
                for th in ths:
                    for dp in dps:
                        
                        print(f'Hyper params: epochs - {epochs}, learning_rate - {lr}, '
                             f'embedding_dim - {embedding_dim}, hidden_dim - {hidden_dim}, '
                             f'threshold_level - {th}, drop_prob - {dp}.')
                        model = GRUFixedLen(vocab_size=max_words, 
                                             embedding_dim=embedding_dim, hidden_dim=hidden_dim, 
                                             drop_prob=dp, use_last=False)
                        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
                        model = model.to(device)
                        model.train()
                        th = th

                        train_loss_history = []
                        test_loss_history = []


                        for epoch in range(epochs):  
                            running_items, running_right = 0.0, 0.0
                            for i, data in enumerate(train_loader, 0):
                                inputs, labels = data[0].to(device), data[1].to(device)

                                # обнуляем градиент
                                optimizer.zero_grad()
                                outputs = model(inputs)

                                loss = criterion(outputs, labels.float().view(-1, 1))
                                loss.backward()
                                optimizer.step()

                                # подсчет ошибки на обучении
                                loss = loss.item()
                                running_items += len(labels)
                                # подсчет метрики на обучении
                                pred_labels = torch.squeeze((outputs > th).int())
                                running_right += (labels == pred_labels).sum()

                            # выводим статистику о процессе обучения
                            model.eval()

                            print(f'Epoch [{epoch + 1}/{epochs}]. ' \
                                    f'Step [{i + 1}/{len(train_loader)}]. ' \
                                    f'Loss: {loss:.3f}. ' \
                                    f'Acc: {running_right / running_items:.3f}', end='. ')
                            running_loss, running_items, running_right = 0.0, 0.0, 0.0
                            train_loss_history.append(loss)

                                # выводим статистику на тестовых данных
                            test_running_right, test_running_total, test_loss = 0.0, 0.0, 0.0
                            for j, data in enumerate(val_loader):
                                test_labels = data[1].to(device)
                                test_outputs = model(data[0].to(device))

                                # подсчет ошибки на тесте
                                test_loss = criterion(test_outputs, test_labels.float().view(-1, 1))
                                # подсчет метрики на тесте
                                test_running_total += len(data[1])
                                pred_test_labels = torch.squeeze((test_outputs > th).int())
                                test_running_right += (test_labels == pred_test_labels).sum()

                            test_loss_history.append(test_loss.item())
                            print(f'Test loss: {test_loss:.3f}.' 
                                  f'Test acc: {test_running_right / test_running_total:.3f}')

                            model.train()

                        print('Finished!')

Hyper params: epochs - 5, learning_rate - 0.01, embedding_dim - 128, hidden_dim - 64, threshold_level - 0.3, drop_prob - 0.1.
Epoch [1/5]. Step [44/44]. Loss: 0.159. Acc: 0.898. Test loss: 0.278.Test acc: 0.938
Epoch [2/5]. Step [44/44]. Loss: 0.154. Acc: 0.952. Test loss: 0.132.Test acc: 0.944
Epoch [3/5]. Step [44/44]. Loss: 0.111. Acc: 0.960. Test loss: 0.003.Test acc: 0.935
Epoch [4/5]. Step [44/44]. Loss: 0.087. Acc: 0.969. Test loss: 0.043.Test acc: 0.938
Epoch [5/5]. Step [44/44]. Loss: 0.066. Acc: 0.976. Test loss: 0.030.Test acc: 0.944
Finished!
Hyper params: epochs - 5, learning_rate - 0.01, embedding_dim - 128, hidden_dim - 64, threshold_level - 0.3, drop_prob - 0.2.
Epoch [1/5]. Step [44/44]. Loss: 0.176. Acc: 0.899. Test loss: 0.065.Test acc: 0.946
Epoch [2/5]. Step [44/44]. Loss: 0.111. Acc: 0.952. Test loss: 0.018.Test acc: 0.947
Epoch [3/5]. Step [44/44]. Loss: 0.140. Acc: 0.961. Test loss: 0.550.Test acc: 0.952
Epoch [4/5]. Step [44/44]. Loss: 0.090. Acc: 0.971. Test l

Epoch [1/5]. Step [44/44]. Loss: 0.254. Acc: 0.924. Test loss: 0.655.Test acc: 0.933
Epoch [2/5]. Step [44/44]. Loss: 0.157. Acc: 0.940. Test loss: 0.035.Test acc: 0.941
Epoch [3/5]. Step [44/44]. Loss: 0.166. Acc: 0.949. Test loss: 0.112.Test acc: 0.948
Epoch [4/5]. Step [44/44]. Loss: 0.117. Acc: 0.956. Test loss: 0.033.Test acc: 0.951
Epoch [5/5]. Step [44/44]. Loss: 0.143. Acc: 0.960. Test loss: 0.726.Test acc: 0.950
Finished!
Hyper params: epochs - 5, learning_rate - 0.001, embedding_dim - 128, hidden_dim - 96, threshold_level - 0.3, drop_prob - 0.1.
Epoch [1/5]. Step [44/44]. Loss: 0.192. Acc: 0.728. Test loss: 0.354.Test acc: 0.934
Epoch [2/5]. Step [44/44]. Loss: 0.121. Acc: 0.939. Test loss: 0.024.Test acc: 0.939
Epoch [3/5]. Step [44/44]. Loss: 0.168. Acc: 0.946. Test loss: 0.048.Test acc: 0.945
Epoch [4/5]. Step [44/44]. Loss: 0.138. Acc: 0.953. Test loss: 0.087.Test acc: 0.947
Epoch [5/5]. Step [44/44]. Loss: 0.099. Acc: 0.957. Test loss: 0.033.Test acc: 0.948
Finished!
Hyp

Epoch [5/5]. Step [44/44]. Loss: 0.101. Acc: 0.964. Test loss: 0.026.Test acc: 0.944
Finished!
Hyper params: epochs - 5, learning_rate - 0.001, embedding_dim - 256, hidden_dim - 96, threshold_level - 0.3, drop_prob - 0.3.
Epoch [1/5]. Step [44/44]. Loss: 0.209. Acc: 0.739. Test loss: 0.537.Test acc: 0.940
Epoch [2/5]. Step [44/44]. Loss: 0.131. Acc: 0.943. Test loss: 0.668.Test acc: 0.940
Epoch [3/5]. Step [44/44]. Loss: 0.131. Acc: 0.952. Test loss: 0.015.Test acc: 0.943
Epoch [4/5]. Step [44/44]. Loss: 0.126. Acc: 0.956. Test loss: 0.027.Test acc: 0.944
Epoch [5/5]. Step [44/44]. Loss: 0.084. Acc: 0.964. Test loss: 0.018.Test acc: 0.940
Finished!
Hyper params: epochs - 5, learning_rate - 0.001, embedding_dim - 256, hidden_dim - 96, threshold_level - 0.5, drop_prob - 0.1.
Epoch [1/5]. Step [44/44]. Loss: 0.181. Acc: 0.896. Test loss: 0.055.Test acc: 0.938
Epoch [2/5]. Step [44/44]. Loss: 0.167. Acc: 0.948. Test loss: 0.037.Test acc: 0.949
Epoch [3/5]. Step [44/44]. Loss: 0.135. Acc: 0

Epoch [1/10]. Step [44/44]. Loss: 0.208. Acc: 0.885. Test loss: 0.062.Test acc: 0.947
Epoch [2/10]. Step [44/44]. Loss: 0.131. Acc: 0.954. Test loss: 0.233.Test acc: 0.948
Epoch [3/10]. Step [44/44]. Loss: 0.070. Acc: 0.964. Test loss: 0.405.Test acc: 0.950
Epoch [4/10]. Step [44/44]. Loss: 0.066. Acc: 0.971. Test loss: 0.004.Test acc: 0.944
Epoch [5/10]. Step [44/44]. Loss: 0.034. Acc: 0.979. Test loss: 0.018.Test acc: 0.939
Epoch [6/10]. Step [44/44]. Loss: 0.046. Acc: 0.984. Test loss: 0.000.Test acc: 0.943
Epoch [7/10]. Step [44/44]. Loss: 0.027. Acc: 0.986. Test loss: 1.492.Test acc: 0.941
Epoch [8/10]. Step [44/44]. Loss: 0.034. Acc: 0.989. Test loss: 0.001.Test acc: 0.940
Epoch [9/10]. Step [44/44]. Loss: 0.037. Acc: 0.989. Test loss: 2.914.Test acc: 0.943
Epoch [10/10]. Step [44/44]. Loss: 0.052. Acc: 0.989. Test loss: 0.000.Test acc: 0.936
Finished!
Hyper params: epochs - 10, learning_rate - 0.01, embedding_dim - 256, hidden_dim - 64, threshold_level - 0.5, drop_prob - 0.1.
Ep

Epoch [5/10]. Step [44/44]. Loss: 0.116. Acc: 0.957. Test loss: 0.062.Test acc: 0.951
Epoch [6/10]. Step [44/44]. Loss: 0.103. Acc: 0.961. Test loss: 0.005.Test acc: 0.945
Epoch [7/10]. Step [44/44]. Loss: 0.092. Acc: 0.965. Test loss: 0.003.Test acc: 0.947
Epoch [8/10]. Step [44/44]. Loss: 0.092. Acc: 0.969. Test loss: 0.014.Test acc: 0.948
Epoch [9/10]. Step [44/44]. Loss: 0.058. Acc: 0.974. Test loss: 0.317.Test acc: 0.944
Epoch [10/10]. Step [44/44]. Loss: 0.070. Acc: 0.979. Test loss: 0.007.Test acc: 0.943
Finished!
Hyper params: epochs - 10, learning_rate - 0.001, embedding_dim - 128, hidden_dim - 96, threshold_level - 0.3, drop_prob - 0.2.
Epoch [1/10]. Step [44/44]. Loss: 0.193. Acc: 0.748. Test loss: 0.489.Test acc: 0.935
Epoch [2/10]. Step [44/44]. Loss: 0.197. Acc: 0.940. Test loss: 0.016.Test acc: 0.941
Epoch [3/10]. Step [44/44]. Loss: 0.175. Acc: 0.947. Test loss: 0.038.Test acc: 0.943
Epoch [4/10]. Step [44/44]. Loss: 0.149. Acc: 0.953. Test loss: 0.042.Test acc: 0.945
E

Epoch [9/10]. Step [44/44]. Loss: 0.060. Acc: 0.986. Test loss: 0.003.Test acc: 0.947
Epoch [10/10]. Step [44/44]. Loss: 0.045. Acc: 0.990. Test loss: 0.252.Test acc: 0.949
Finished!
Hyper params: epochs - 10, learning_rate - 0.001, embedding_dim - 256, hidden_dim - 96, threshold_level - 0.5, drop_prob - 0.3.
Epoch [1/10]. Step [44/44]. Loss: 0.204. Acc: 0.933. Test loss: 0.150.Test acc: 0.942
Epoch [2/10]. Step [44/44]. Loss: 0.154. Acc: 0.949. Test loss: 0.017.Test acc: 0.948
Epoch [3/10]. Step [44/44]. Loss: 0.086. Acc: 0.958. Test loss: 0.536.Test acc: 0.951
Epoch [4/10]. Step [44/44]. Loss: 0.113. Acc: 0.963. Test loss: 0.015.Test acc: 0.953
Epoch [5/10]. Step [44/44]. Loss: 0.101. Acc: 0.968. Test loss: 0.002.Test acc: 0.951
Epoch [6/10]. Step [44/44]. Loss: 0.130. Acc: 0.973. Test loss: 0.005.Test acc: 0.949
Epoch [7/10]. Step [44/44]. Loss: 0.076. Acc: 0.979. Test loss: 0.006.Test acc: 0.949
Epoch [8/10]. Step [44/44]. Loss: 0.078. Acc: 0.984. Test loss: 0.002.Test acc: 0.947
E

По результатам обучения мы получили на 5 эпохах лучшие параметры accuracy 99% на трейне и 95,2% на тесте; на 10 эпохах немного хуже: на трейне - 98,9% и 95% - на тесте. Сами по себе чистые результаты оказались довольно близки и граница сильно размыта, что в свою очередь затрудняет тонкие настройки модели в чистом виде.

Инициализируем и обучим сеть LSTM на данных

In [26]:
class LSTMFixedLen(nn.Module) :
    def __init__(self, vocab_size, embedding_dim=128, hidden_dim=128, drop_prob=0.1, use_last=True):
        super().__init__()
        self.use_last = use_last
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=2, batch_first=True, dropout=drop_prob)
        self.linear = nn.Linear(hidden_dim, 1)
#         self.dropout = nn.Dropout(drop_prob)
        
    def forward(self, x):
        x = self.embeddings(x)
#         x = self.dropout(x)
        lstm_out, ht = self.lstm(x)
       
        if self.use_last:
            last_tensor = lstm_out[:,-1,:]
        else:
            # use mean
            last_tensor = torch.mean(lstm_out[:,:], dim=1)
    
        out = self.linear(last_tensor)
        return torch.sigmoid(out)

In [48]:
for epochs in n_epochs:
    for lr in learning_rates:
        for embedding_dim in e_dims:
            for hidden_dim in h_dims:
                for th in ths:
                    for dp in dps:
                        
                        print(f'Hyper params: epochs - {epochs}, learning_rate - {lr}, '
                             f'embedding_dim - {embedding_dim}, hidden_dim - {hidden_dim}, '
                             f'threshold_level - {th}, drop_prob - {dp}.')
                        model = LSTMFixedLen(vocab_size=max_words, 
                                             embedding_dim=embedding_dim, hidden_dim=hidden_dim, 
                                             drop_prob=dp, use_last=False)
                        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
                        model = model.to(device)
                        model.train()
                        th = th

                        train_loss_history = []
                        test_loss_history = []


                        for epoch in range(epochs):  
                            running_items, running_right = 0.0, 0.0
                            for i, data in enumerate(train_loader, 0):
                                inputs, labels = data[0].to(device), data[1].to(device)

                                # обнуляем градиент
                                optimizer.zero_grad()
                                outputs = model(inputs)

                                loss = criterion(outputs, labels.float().view(-1, 1))
                                loss.backward()
                                optimizer.step()

                                # подсчет ошибки на обучении
                                loss = loss.item()
                                running_items += len(labels)
                                # подсчет метрики на обучении
                                pred_labels = torch.squeeze((outputs > th).int())
                                running_right += (labels == pred_labels).sum()

                            # выводим статистику о процессе обучения
                            model.eval()

                            print(f'Epoch [{epoch + 1}/{epochs}]. ' \
                                    #f'Step [{i + 1}/{len(train_loader)}]. ' \
                                    f'Loss: {loss:.3f}. ' \
                                    f'Acc: {running_right / running_items:.3f}', end='. ')
                            running_loss, running_items, running_right = 0.0, 0.0, 0.0
                            train_loss_history.append(loss)

                                # выводим статистику на тестовых данных
                            test_running_right, test_running_total, test_loss = 0.0, 0.0, 0.0
                            for j, data in enumerate(val_loader):
                                test_labels = data[1].to(device)
                                test_outputs = model(data[0].to(device))

                                # подсчет ошибки на тесте
                                test_loss = criterion(test_outputs, test_labels.float().view(-1, 1))
                                # подсчет метрики на тесте
                                test_running_total += len(data[1])
                                pred_test_labels = torch.squeeze((test_outputs > th).int())
                                test_running_right += (test_labels == pred_test_labels).sum()

                            test_loss_history.append(test_loss.item())
                            print(f'Test loss: {test_loss:.3f}.' 
                                  f'Test acc: {test_running_right / test_running_total:.3f}')

                            model.train()

                        print('Finished!')

Hyper params: epochs - 5, learning_rate - 0.01, embedding_dim - 128, hidden_dim - 64, threshold_level - 0.3, drop_prob - 0.1.
Epoch [1/5]. Loss: 0.178. Acc: 0.875. Test loss: 0.027.Test acc: 0.926
Epoch [2/5]. Loss: 0.148. Acc: 0.948. Test loss: 0.606.Test acc: 0.946
Epoch [3/5]. Loss: 0.108. Acc: 0.957. Test loss: 0.027.Test acc: 0.946
Epoch [4/5]. Loss: 0.095. Acc: 0.965. Test loss: 0.122.Test acc: 0.945
Epoch [5/5]. Loss: 0.083. Acc: 0.969. Test loss: 1.261.Test acc: 0.940
Finished!
Hyper params: epochs - 5, learning_rate - 0.01, embedding_dim - 128, hidden_dim - 64, threshold_level - 0.3, drop_prob - 0.2.
Epoch [1/5]. Loss: 0.253. Acc: 0.870. Test loss: 0.112.Test acc: 0.928
Epoch [2/5]. Loss: 0.209. Acc: 0.942. Test loss: 0.011.Test acc: 0.946
Epoch [3/5]. Loss: 0.132. Acc: 0.953. Test loss: 0.083.Test acc: 0.949
Epoch [4/5]. Loss: 0.116. Acc: 0.960. Test loss: 0.003.Test acc: 0.947
Epoch [5/5]. Loss: 0.108. Acc: 0.965. Test loss: 0.002.Test acc: 0.928
Finished!
Hyper params: epoc

Epoch [4/5]. Loss: 0.081. Acc: 0.966. Test loss: 0.013.Test acc: 0.949
Epoch [5/5]. Loss: 0.087. Acc: 0.973. Test loss: 0.005.Test acc: 0.948
Finished!
Hyper params: epochs - 5, learning_rate - 0.01, embedding_dim - 256, hidden_dim - 64, threshold_level - 0.5, drop_prob - 0.3.
Epoch [1/5]. Loss: 0.150. Acc: 0.921. Test loss: 0.181.Test acc: 0.944
Epoch [2/5]. Loss: 0.196. Acc: 0.949. Test loss: 0.055.Test acc: 0.947
Epoch [3/5]. Loss: 0.184. Acc: 0.961. Test loss: 0.009.Test acc: 0.953
Epoch [4/5]. Loss: 0.125. Acc: 0.967. Test loss: 0.014.Test acc: 0.954
Epoch [5/5]. Loss: 0.113. Acc: 0.974. Test loss: 0.057.Test acc: 0.952
Finished!
Hyper params: epochs - 5, learning_rate - 0.01, embedding_dim - 256, hidden_dim - 96, threshold_level - 0.3, drop_prob - 0.1.
Epoch [1/5]. Loss: 0.120. Acc: 0.896. Test loss: 0.026.Test acc: 0.941
Epoch [2/5]. Loss: 0.102. Acc: 0.948. Test loss: 0.588.Test acc: 0.948
Epoch [3/5]. Loss: 0.137. Acc: 0.958. Test loss: 0.255.Test acc: 0.946
Epoch [4/5]. Loss:

Epoch [2/5]. Loss: 0.192. Acc: 0.940. Test loss: 0.029.Test acc: 0.944
Epoch [3/5]. Loss: 0.173. Acc: 0.950. Test loss: 0.016.Test acc: 0.947
Epoch [4/5]. Loss: 0.183. Acc: 0.957. Test loss: 0.072.Test acc: 0.947
Epoch [5/5]. Loss: 0.102. Acc: 0.961. Test loss: 0.005.Test acc: 0.950
Finished!
Hyper params: epochs - 5, learning_rate - 0.001, embedding_dim - 128, hidden_dim - 96, threshold_level - 0.5, drop_prob - 0.2.
Epoch [1/5]. Loss: 0.216. Acc: 0.930. Test loss: 0.131.Test acc: 0.934
Epoch [2/5]. Loss: 0.170. Acc: 0.942. Test loss: 0.117.Test acc: 0.943
Epoch [3/5]. Loss: 0.127. Acc: 0.952. Test loss: 0.080.Test acc: 0.948
Epoch [4/5]. Loss: 0.086. Acc: 0.958. Test loss: 0.718.Test acc: 0.950
Epoch [5/5]. Loss: 0.118. Acc: 0.962. Test loss: 0.030.Test acc: 0.950
Finished!
Hyper params: epochs - 5, learning_rate - 0.001, embedding_dim - 128, hidden_dim - 96, threshold_level - 0.5, drop_prob - 0.3.
Epoch [1/5]. Loss: 0.243. Acc: 0.930. Test loss: 0.065.Test acc: 0.932
Epoch [2/5]. Los

Epoch [7/10]. Loss: 0.056. Acc: 0.980. Test loss: 1.464.Test acc: 0.948
Epoch [8/10]. Loss: 0.047. Acc: 0.984. Test loss: 0.000.Test acc: 0.942
Epoch [9/10]. Loss: 0.029. Acc: 0.987. Test loss: 0.005.Test acc: 0.941
Epoch [10/10]. Loss: 0.022. Acc: 0.989. Test loss: 0.000.Test acc: 0.947
Finished!
Hyper params: epochs - 10, learning_rate - 0.01, embedding_dim - 128, hidden_dim - 96, threshold_level - 0.5, drop_prob - 0.3.
Epoch [1/10]. Loss: 0.196. Acc: 0.918. Test loss: 0.020.Test acc: 0.937
Epoch [2/10]. Loss: 0.177. Acc: 0.954. Test loss: 0.067.Test acc: 0.951
Epoch [3/10]. Loss: 0.119. Acc: 0.960. Test loss: 0.030.Test acc: 0.951
Epoch [4/10]. Loss: 0.105. Acc: 0.967. Test loss: 0.004.Test acc: 0.949
Epoch [5/10]. Loss: 0.056. Acc: 0.972. Test loss: 0.026.Test acc: 0.952
Epoch [6/10]. Loss: 0.046. Acc: 0.975. Test loss: 0.106.Test acc: 0.952
Epoch [7/10]. Loss: 0.056. Acc: 0.980. Test loss: 0.006.Test acc: 0.945
Epoch [8/10]. Loss: 0.034. Acc: 0.984. Test loss: 0.000.Test acc: 0.94

Epoch [2/10]. Loss: 0.137. Acc: 0.937. Test loss: 0.015.Test acc: 0.941
Epoch [3/10]. Loss: 0.157. Acc: 0.951. Test loss: 0.012.Test acc: 0.935
Epoch [4/10]. Loss: 0.108. Acc: 0.958. Test loss: 0.008.Test acc: 0.946
Epoch [5/10]. Loss: 0.072. Acc: 0.965. Test loss: 0.007.Test acc: 0.944
Epoch [6/10]. Loss: 0.072. Acc: 0.971. Test loss: 0.046.Test acc: 0.943
Epoch [7/10]. Loss: 0.080. Acc: 0.975. Test loss: 0.031.Test acc: 0.947
Epoch [8/10]. Loss: 0.034. Acc: 0.980. Test loss: 1.839.Test acc: 0.943
Epoch [9/10]. Loss: 0.045. Acc: 0.982. Test loss: 0.352.Test acc: 0.944
Epoch [10/10]. Loss: 0.022. Acc: 0.988. Test loss: 1.318.Test acc: 0.941
Finished!
Hyper params: epochs - 10, learning_rate - 0.01, embedding_dim - 256, hidden_dim - 96, threshold_level - 0.5, drop_prob - 0.1.
Epoch [1/10]. Loss: 0.129. Acc: 0.941. Test loss: 0.046.Test acc: 0.949
Epoch [2/10]. Loss: 0.095. Acc: 0.956. Test loss: 0.005.Test acc: 0.950
Epoch [3/10]. Loss: 0.125. Acc: 0.963. Test loss: 0.087.Test acc: 0.95

Epoch [2/10]. Loss: 0.152. Acc: 0.939. Test loss: 0.037.Test acc: 0.942
Epoch [3/10]. Loss: 0.192. Acc: 0.952. Test loss: 0.019.Test acc: 0.948
Epoch [4/10]. Loss: 0.107. Acc: 0.960. Test loss: 0.070.Test acc: 0.951
Epoch [5/10]. Loss: 0.101. Acc: 0.966. Test loss: 0.209.Test acc: 0.952
Epoch [6/10]. Loss: 0.111. Acc: 0.971. Test loss: 0.039.Test acc: 0.949
Epoch [7/10]. Loss: 0.066. Acc: 0.976. Test loss: 0.059.Test acc: 0.948
Epoch [8/10]. Loss: 0.064. Acc: 0.981. Test loss: 0.012.Test acc: 0.950
Epoch [9/10]. Loss: 0.073. Acc: 0.983. Test loss: 0.094.Test acc: 0.950
Epoch [10/10]. Loss: 0.077. Acc: 0.987. Test loss: 0.010.Test acc: 0.948
Finished!
Hyper params: epochs - 10, learning_rate - 0.001, embedding_dim - 256, hidden_dim - 64, threshold_level - 0.5, drop_prob - 0.2.
Epoch [1/10]. Loss: 0.156. Acc: 0.930. Test loss: 0.050.Test acc: 0.933
Epoch [2/10]. Loss: 0.157. Acc: 0.940. Test loss: 0.112.Test acc: 0.945
Epoch [3/10]. Loss: 0.121. Acc: 0.952. Test loss: 0.270.Test acc: 0.9

Процесс обучения показал так же весьма близкие результаты: на 5 эпохах точность 96,7%/95,3% соответственно на трейн и тест; на 10 эпохах 98,6%/95%. Для получения более усредненный стабильных результатов обучим итоговоую модель с настройками, взятыми из предыдущих моделей, показавшими на обучении лучшие результаты. Для ускорения процесса я возьму 5 эпох, вместо 10-ти, исхлдя из соображений удешевления процесса (полученные показатели были очень близки).

In [27]:
model = LSTMFixedLen(vocab_size=max_words, 
                 embedding_dim=256, hidden_dim=96, 
                 drop_prob=0.1, use_last=False)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [28]:
model = model.to(device)
model.train()
th = 0.5
epochs = 5

train_loss_history = []
test_loss_history = []


for epoch in range(epochs):  
    running_items, running_right = 0.0, 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        # обнуляем градиент
        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, labels.float().view(-1, 1))
        loss.backward()
        optimizer.step()

        # подсчет ошибки на обучении
        loss = loss.item()
        running_items += len(labels)
        # подсчет метрики на обучении
        pred_labels = torch.squeeze((outputs > th).int())
        running_right += (labels == pred_labels).sum()

    # выводим статистику о процессе обучения
    model.eval()

    print(f'Epoch [{epoch + 1}/{epochs}]. ' \
            f'Step [{i + 1}/{len(train_loader)}]. ' \
            f'Loss: {loss:.3f}. ' \
            f'Acc: {running_right / running_items:.3f}', end='. ')
    running_loss, running_items, running_right = 0.0, 0.0, 0.0
    train_loss_history.append(loss)

        # выводим статистику на тестовых данных
    test_running_right, test_running_total, test_loss = 0.0, 0.0, 0.0
    for j, data in enumerate(val_loader):
        test_labels = data[1].to(device)
        test_outputs = model(data[0].to(device))

        # подсчет ошибки на тесте
        test_loss = criterion(test_outputs, test_labels.float().view(-1, 1))
        # подсчет метрики на тесте
        test_running_total += len(data[1])
        pred_test_labels = torch.squeeze((test_outputs > th).int())
        test_running_right += (test_labels == pred_test_labels).sum()

    test_loss_history.append(test_loss.item())
    print(f'Test loss: {test_loss:.3f}.' 
          f'Test acc: {test_running_right / test_running_total:.3f}')

    model.train()

print('Finished!')

Epoch [1/5]. Step [44/44]. Loss: 0.239. Acc: 0.931. Test loss: 0.630.Test acc: 0.935
Epoch [2/5]. Step [44/44]. Loss: 0.145. Acc: 0.946. Test loss: 0.022.Test acc: 0.945
Epoch [3/5]. Step [44/44]. Loss: 0.146. Acc: 0.956. Test loss: 0.022.Test acc: 0.951
Epoch [4/5]. Step [44/44]. Loss: 0.105. Acc: 0.962. Test loss: 1.380.Test acc: 0.949
Epoch [5/5]. Step [44/44]. Loss: 0.068. Acc: 0.969. Test loss: 0.036.Test acc: 0.952
Finished!


In [37]:
#import matplotlib as plt
#%matplotlib inline