# Imports

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import OrdinalEncoder
import os
import random
import torch
from torchmetrics import Accuracy
from torch.utils.tensorboard import SummaryWriter

In [2]:
from gensim.models import Word2Vec, FastText
import gensim.downloader as api
from sklearn.metrics import classification_report
import bokeh.models as bm, bokeh.plotting as pl


In [3]:
from dotenv import load_dotenv
import os
load_dotenv()
SEED=int(os.getenv('SEED'))

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def seed_everything(seed):
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(SEED)

In [5]:
import sys
sys.path.append('..')

In [6]:
from src.data.nn_utils import *
from src.models.eval_nn_utils import evaluate_model, get_np_targets
from src.visualization.visualize import draw_vectors, get_tsne_projection
from src.models.train_predict import *

# Data Processing

In [None]:
df_filtered_nn = pd.read_csv('../data/processed/data_processed_no_lemming')

In [None]:
df_filtered_nn = df_filtered_nn.reset_index()

In [None]:
df_filtered_nn['Plot']=df_filtered_nn['Plot'].apply(lambda x: ' '.join(x))

Creating custom dataset and splitting the data.

In [None]:
dataset = MoviePLotsDataset(df_filtered_nn.Plot, df_filtered_nn.Genre)

In [None]:
generator = torch.Generator().manual_seed(SEED)
train_subset, val_subset, test_subset = random_split(dataset, [0.75, 0.125, 0.125], generator=generator)

## Creating vocabulary

In [None]:
tokenizer = get_tokenizer('basic_english')

vocab = get_vocab(train_subset, tokenizer)

# Bucket dataloader utils

In [None]:
encoder = OrdinalEncoder()
encoder.fit(df_filtered_nn['Genre'].to_numpy().reshape(-1, 1));

In [None]:

text_transform = lambda text: vocab(tokenizer(text))
label_transform = lambda label: encoder.transform(np.array(label).reshape(-1, 1))[0][0]

def collate_batch(batch):
    label_list, text_list = [], []
    for (_label, _text) in batch:
        #print(_label)
        label_list.append(label_transform(_label))
        processed_text = torch.tensor(text_transform(_text))
        text_list.append(processed_text)
    return pad_sequence(text_list, padding_value=1.0), torch.tensor(label_list, dtype=torch.long)

In [None]:
bucket_dataloader = DataLoader(train_subset,
                               batch_sampler=BatchSamplerSimilarLength(
                                     dataset=train_subset,
                                     batch_size=16),
                               collate_fn=collate_batch)

In [None]:
texts = [tokenizer(text) for text in df_filtered_nn.Plot]
word2vec_emb = Word2Vec(texts,
                 vector_size=32,      # embedding vector size
                 min_count=5,  # consider words that occured at least 4 times
                 window=5,
                 workers=2).wv

In [None]:
word2vec_emb['happy']

array([-0.00574573,  0.6876102 , -1.4397521 , -0.7084056 , -1.1946324 ,
        0.34313405,  1.9234267 , -1.2126812 , -0.9450241 ,  2.4132478 ,
       -0.63826317,  1.5583279 , -1.6262739 ,  0.345427  , -1.8287905 ,
       -0.20957346, -0.83093745,  1.2358985 , -5.191705  ,  2.2568421 ,
        2.256946  , -0.00996144,  0.97180945,  1.853633  ,  1.9129637 ,
       -2.4262538 , -0.38523382, -1.98292   ,  0.2716784 , -3.7975988 ,
       -2.5451398 , -0.6977667 ], dtype=float32)

In [None]:
words = word2vec_emb.index_to_key[:1000]
word_vectors = word2vec_emb.vectors[[word2vec_emb.key_to_index[word] for word in words]]
word_tsne = get_tsne_projection(word_vectors)
draw_vectors(word_tsne[:, 0], word_tsne[:, 1], color='green', token=words)

In [None]:
from torch import nn

# Main functions

In [None]:
RUNS_PATH = r'../reports/tensorboards/'

In [None]:
loss_fn = nn.CrossEntropyLoss()
loss_fn.to(device)

CrossEntropyLoss()

In [None]:
accuracy = Accuracy(task="multiclass", num_classes=len(encoder.categories_[0]))
accuracy.to(device)

MulticlassAccuracy()

In [None]:
len(encoder.categories_[0])

139

# Experiments with models

## Word2Vec embedding

In [None]:
word2vec_lstm_clf = LSTM2d(word2vec_emb, vocab).to(device)

In [None]:
best_weights = train(word2vec_lstm_clf, SummaryWriter(RUNS_PATH + 'runs/word2vec_lstm_clf'), train_subset, val_subset,collate_batch, patience=5)

Epoch 0: : 610batch [00:26, 22.64batch/s, cross_entropy=2.79]
Epoch 1: : 610batch [00:29, 20.57batch/s, cross_entropy=2.56]
Epoch 2: : 610batch [00:25, 24.35batch/s, cross_entropy=2.81]
Epoch 3: : 610batch [00:24, 24.68batch/s, cross_entropy=2.53]
Epoch 4: : 610batch [00:25, 23.92batch/s, cross_entropy=3.27]
Epoch 5: : 610batch [00:26, 22.76batch/s, cross_entropy=2.57]
Epoch 6: : 610batch [00:23, 25.84batch/s, cross_entropy=2.55]
Epoch 7: : 610batch [00:24, 24.52batch/s, cross_entropy=2.84]
Epoch 8: : 610batch [00:25, 23.90batch/s, cross_entropy=3.1]
Epoch 9: : 610batch [00:24, 24.75batch/s, cross_entropy=3.47]
Epoch 10: : 610batch [00:26, 23.07batch/s, cross_entropy=2.76]
Epoch 11: : 610batch [00:25, 24.18batch/s, cross_entropy=2.98]
Epoch 12: : 610batch [00:29, 20.41batch/s, cross_entropy=2.23]
Epoch 13: : 610batch [00:40, 15.21batch/s, cross_entropy=1.66]
Epoch 14: : 610batch [00:27, 21.94batch/s, cross_entropy=2.8]


In [None]:
word2vec_lstm_clf.load_state_dict(best_weights)

<All keys matched successfully>

In [None]:
evaluate_model(word2vec_lstm_clf, train_subset, collate_batch);

Accuracy score = 0.422


In [None]:
evaluate_model(word2vec_lstm_clf, test_subset, collate_batch);

Accuracy score = 0.362


In [None]:
# %tensorboard --logdir=../reports/tensorboards/runs/word2vec_lstm_clf --port=6007

Achieved result is not better then logistic regression which test score is 0.38, but it is second to that - better then naive bayes and random forest.

To-do:
1) Analize the results - transform predictions to numpy and create class report
2) Try bigger word context - 64 instead of 32
3) Try different embeddings - glove, fasttext
4) Try pretrained - Bert, Gpt.

Let's try glove embedding

In [None]:
y_pred_test, y_test = get_np_targets(word2vec_lstm_clf, test_subset, collate_batch)

In [None]:
present_genres = [encoder.categories_[0][i] for i in np.unique(y_pred_test)]

In [None]:
print(classification_report(y_test, y_pred_test, labels = np.unique(y_pred_test), target_names=present_genres))

              precision    recall  f1-score   support

      action       0.31      0.24      0.27       174
   adventure       0.23      0.39      0.29        71
   animation       0.56      0.42      0.48        55
      comedy       0.37      0.52      0.43       543
       crime       0.17      0.39      0.24        74
 crime drama       0.07      0.04      0.05        50
       drama       0.36      0.61      0.46       773
      horror       0.49      0.43      0.46       138
     musical       0.31      0.07      0.11        57
    romantic       0.24      0.38      0.29       108
      sci_fi       0.30      0.57      0.39        69
    thriller       0.31      0.06      0.09       143
         war       0.22      0.47      0.30        30
     western       0.50      0.70      0.58       119

   micro avg       0.35      0.47      0.40      2404
   macro avg       0.32      0.38      0.32      2404
weighted avg       0.35      0.47      0.38      2404



Those are the only labels that were present in predictions and from those I see that again the best predicted labels are the most distinct western and horror and also the most presented - drama and comedy. Compering to logistic regression results, all the best predicted labels are being predicted even better by log_reg, although some infrequent like animation - better by lstm. It's woth noting that values of micro, macro and weighted avg are not comparable to those in log reg as they are calculated only on some part of classes that were predicted by the model.

Perhaps 32 is not enough for embedding size and so i'll try 64.

In [None]:
word2vec_emb_64 = Word2Vec(texts,
                 vector_size=64,      # embedding vector size
                 min_count=5,  # consider words that occured at least 4 times
                 window=5,
                 workers=2).wv

In [None]:
word2vec_lstm_clf_64 = LSTM2d(word2vec_emb_64, vocab).to(device)

In [None]:
best_weights = train(word2vec_lstm_clf_64, SummaryWriter(RUNS_PATH + 'runs/word2vec_lstm_clf_64'), train_subset, val_subset, collate_batch, patience=5)

Epoch 0: : 610batch [00:34, 17.75batch/s, cross_entropy=3.65]
Epoch 1: : 610batch [00:29, 20.85batch/s, cross_entropy=2.37]
Epoch 2: : 610batch [00:27, 22.38batch/s, cross_entropy=2.46]
Epoch 3: : 610batch [00:25, 23.74batch/s, cross_entropy=3.78]
Epoch 4: : 610batch [00:25, 24.02batch/s, cross_entropy=3.1]
Epoch 5: : 610batch [00:25, 23.53batch/s, cross_entropy=2.3]
Epoch 6: : 610batch [00:27, 22.31batch/s, cross_entropy=2.62]
Epoch 7: : 610batch [00:26, 23.39batch/s, cross_entropy=3.08]


In [None]:
word2vec_lstm_clf_64.load_state_dict(best_weights)

<All keys matched successfully>

In [None]:
evaluate_model(word2vec_lstm_clf_64, train_subset, collate_batch);

Accuracy score = 0.361


In [None]:
evaluate_model(word2vec_lstm_clf_64, test_subset, collate_batch);

Accuracy score = 0.341


In [None]:
# %tensorboard --logdir=../reports/tensorboards/runs/word2vec_lstm_clf_64 --port=6008

Bigger context achieved much worse results so I'll stick to the 32 embedding size and try to provide some reguralization to the model.

In [None]:
word2vec_lstm_clf_reg = LSTM2d(word2vec_emb, vocab, p_drop_out=0.2, weight_decay=1e-6).to(device)

In [None]:
best_weights = train(word2vec_lstm_clf_reg, SummaryWriter(RUNS_PATH + 'runs/word2vec_lstm_clf_reg'), train_subset, val_subset, collate_batch, patience=5)

Epoch 0: : 610batch [00:25, 23.88batch/s, cross_entropy=3.23]
Epoch 1: : 610batch [00:25, 23.97batch/s, cross_entropy=2.9]
Epoch 2: : 610batch [00:26, 23.06batch/s, cross_entropy=2.49]
Epoch 3: : 610batch [00:28, 21.47batch/s, cross_entropy=2.75]
Epoch 4: : 610batch [00:26, 22.72batch/s, cross_entropy=2.78]
Epoch 5: : 610batch [00:24, 24.47batch/s, cross_entropy=2.23]
Epoch 6: : 610batch [00:24, 24.63batch/s, cross_entropy=2.54]
Epoch 7: : 610batch [00:26, 23.23batch/s, cross_entropy=2.17]
Epoch 8: : 610batch [00:26, 23.31batch/s, cross_entropy=2.21]
Epoch 9: : 610batch [00:24, 24.53batch/s, cross_entropy=2.44]
Epoch 10: : 610batch [00:24, 24.71batch/s, cross_entropy=2.96]
Epoch 11: : 610batch [00:25, 23.87batch/s, cross_entropy=2.07]
Epoch 12: : 610batch [00:28, 21.35batch/s, cross_entropy=3.04]


In [None]:
word2vec_lstm_clf_reg.load_state_dict(best_weights)

<All keys matched successfully>

In [None]:
evaluate_model(word2vec_lstm_clf_reg, train_subset, collate_batch);

Accuracy score = 0.403


In [None]:
evaluate_model(word2vec_lstm_clf_reg, test_subset, collate_batch);

Accuracy score = 0.367


In [None]:
# %tensorboard --logdir=../reports/tensorboards/runs/word2vec_lstm_clf_reg --port=6009

Reguralization appears to be to strong so I'll try to tune it

In [None]:
def tune_drop_out(model_class, vocab, values, embedding, folder_name: str):
    test_acc = []
    for p in values:
        print(f'Drop_out value = {p} :')
        model = model_class(embedding, vocab, p_drop_out=p, weight_decay=1e-6).to(device)
        best_weights = train(model, SummaryWriter(RUNS_PATH + 'runs/' + folder_name + '/' + str(p)),collate_batch, patience=5, verbose=False)
        print('Train', end=' ')
        evaluate_model(model, train_subset);
        print('Test', end=' ')
        acc = evaluate_model(model, test_subset);
        test_acc.append(acc)
        print('')
    return {'p': values[test_acc.index(max(test_acc))]}

In [None]:
tune_drop_out(LSTM2d, vocab, (0.05, 0.1, 0.15), word2vec_emb, 'dropout_tune')

Drop_out value = 0.05 :
Train Accuracy score = 0.396
Test Accuracy score = 0.359

Drop_out value = 0.1 :
Train Accuracy score = 0.375
Test Accuracy score = 0.338

Drop_out value = 0.15 :
Train Accuracy score = 0.407
Test Accuracy score = 0.373



{'p': 0.15}

In [None]:
# %tensorboard --logdir=../reports/tensorboards/runs/dropout_tune --port=6015

Dropout value seems to to be having an unclear effect on the results so I'll try to teach the best looking model with p=0.15 one more time. Either wqay around, 2 directional 2 layerded lstm with trained embeddings is not able to beat baseline logistic regression.

In [None]:
word2vec_lstm_clf_reg_tuned = LSTM2d(word2vec_emb, vocab, p_drop_out=0.15, weight_decay=1e-6).to(device)

In [None]:
best_weights = train(word2vec_lstm_clf_reg_tuned, SummaryWriter(RUNS_PATH + 'runs/word2vec_lstm_clf_reg_tuned'), train_subset, val_subset,collate_batch, patience=5)

Epoch 0: : 610batch [00:26, 22.79batch/s, cross_entropy=3.09]
Epoch 1: : 610batch [00:26, 22.88batch/s, cross_entropy=2.74]
Epoch 2: : 610batch [00:26, 22.93batch/s, cross_entropy=2.6]
Epoch 3: : 610batch [00:25, 23.92batch/s, cross_entropy=2.81]
Epoch 4: : 610batch [00:26, 23.25batch/s, cross_entropy=3.08]
Epoch 5: : 610batch [00:26, 23.06batch/s, cross_entropy=3.08]


In [None]:
word2vec_lstm_clf_reg_tuned.load_state_dict(best_weights)

<All keys matched successfully>

In [None]:
print('Train', end=' ')
evaluate_model(word2vec_lstm_clf_reg_tuned, train_subset, collate_batch);

Train Accuracy score = 0.314


In [None]:
print('Test', end=' ')
evaluate_model(word2vec_lstm_clf_reg_tuned, test_subset, collate_batch);

Test Accuracy score = 0.308


It seems that previous good result was random and so the best model is the default one without reguralization.

## Fasttext embedding

In [None]:
fasttext_emb_64 = FastText(texts,
                 vector_size=32,      # embedding vector size
                 min_count=5,  # consider words that occured at least 5 times
                 window=5,
                 workers=2).wv

In [None]:
fasttext_lstm_reg = LSTM2d(fasttext_emb_64, vocab, p_drop_out=0.05, weight_decay=1e-6).to(device)

In [None]:
best_weights = train(fasttext_lstm_reg, SummaryWriter(RUNS_PATH + 'runs/fasttext_lstm_reg'), train_subset, val_subset,collate_batch, patience=5)

Epoch 0: : 610batch [00:26, 22.91batch/s, cross_entropy=2.79]
Epoch 1: : 610batch [00:26, 22.64batch/s, cross_entropy=2.99]
Epoch 2: : 610batch [00:26, 23.18batch/s, cross_entropy=2.46]
Epoch 3: : 610batch [00:25, 23.54batch/s, cross_entropy=3.22]
Epoch 4: : 610batch [00:25, 23.69batch/s, cross_entropy=2.86]
Epoch 5: : 610batch [00:25, 23.79batch/s, cross_entropy=2.69]
Epoch 6: : 610batch [00:26, 22.77batch/s, cross_entropy=2.22]
Epoch 7: : 610batch [00:28, 21.14batch/s, cross_entropy=2.49]


In [None]:
fasttext_lstm_reg.load_state_dict(best_weights)

<All keys matched successfully>

In [None]:
evaluate_model(fasttext_lstm_reg, train_subset, collate_batch);

Accuracy score = 0.353


In [None]:
evaluate_model(fasttext_lstm_reg, test_subset, collate_batch);

Accuracy score = 0.333


In [None]:
# %tensorboard --logdir=../reports/tensorboards/runs/fasttext_lstm_reg --port=6013

In [None]:
tune_drop_out(LSTM2d, vocab, (0, 0.1, 0.15, 0.2), fasttext_emb_64, 'dropout_tune_fasttext')

Drop_out value = 0 :
Train Accuracy score = 0.432
Test Accuracy score = 0.369

Drop_out value = 0.1 :
Train Accuracy score = 0.53
Test Accuracy score = 0.357

Drop_out value = 0.15 :
Train Accuracy score = 0.437
Test Accuracy score = 0.352

Drop_out value = 0.2 :
Train Accuracy score = 0.562
Test Accuracy score = 0.358



{'p': 0}

In [None]:
fasttext_lstm_tuned = LSTM2d(fasttext_emb_64, vocab, p_drop_out=0, weight_decay=1e-6).to(device)

In [None]:
best_weights = train(fasttext_lstm_tuned, SummaryWriter(RUNS_PATH + 'runs/fasttext_lstm_tuned'), train_subset, val_subset,collate_batch, patience=5)

Epoch 0: : 610batch [00:26, 22.93batch/s, cross_entropy=2.99]
Epoch 1: : 610batch [00:26, 22.68batch/s, cross_entropy=3.08]
Epoch 2: : 610batch [00:26, 23.30batch/s, cross_entropy=2.52]
Epoch 3: : 610batch [00:24, 24.81batch/s, cross_entropy=2.96]
Epoch 4: : 610batch [00:26, 23.39batch/s, cross_entropy=2.63]
Epoch 5: : 610batch [00:28, 21.58batch/s, cross_entropy=3.02]
Epoch 6: : 610batch [00:25, 23.65batch/s, cross_entropy=2.06]
Epoch 7: : 610batch [00:25, 24.26batch/s, cross_entropy=2.63]
Epoch 8: : 610batch [00:25, 23.96batch/s, cross_entropy=2.65]
Epoch 9: : 610batch [00:26, 23.30batch/s, cross_entropy=1.74]
Epoch 10: : 610batch [00:26, 22.68batch/s, cross_entropy=3.36]
Epoch 11: : 610batch [00:25, 23.94batch/s, cross_entropy=2.75]
Epoch 12: : 610batch [00:24, 25.03batch/s, cross_entropy=3.03]
Epoch 13: : 610batch [00:27, 22.06batch/s, cross_entropy=3.76]


In [None]:
fasttext_lstm_tuned.load_state_dict(best_weights)

<All keys matched successfully>

In [None]:
evaluate_model(fasttext_lstm_tuned, train_subset, collate_batch);

Accuracy score = 0.427


In [None]:
evaluate_model(fasttext_lstm_tuned, test_subset, collate_batch);

Accuracy score = 0.367


Model with Fasttext embedding performs very simmilarly to word2vec models with trained and pretrained embeddings but considering that it's results very proven one time(one - when I was finding best dropout value and 2nd - when i trained a new model with it) and because it can handle unknown words I will save this model.

In [None]:
model_scripted = torch.jit.script(fasttext_lstm_tuned) # Export to TorchScript
model_scripted.save(r'..\models\multiclass_models\NN models\fasttext_lstm_scripted.pt') # Save

In [None]:
# import shutil

# # Set the source and destination paths
# file_source = '/content/runs'
# file_destination = '/content/drive/MyDrive/movie plot tensorboard/runs'

# # Copy the file from Colab to Google Drive
# shutil.move(file_source, file_destination)

'/content/drive/MyDrive/movie plot tensorboard/runs/runs'

## Pretrained embeddings

Now, that I'm going to use pretrained models, I won't have problems with unknown words and so I'll make vocabulary bigger so that I used the advantages of pretrained models

In [None]:
vocab = get_vocab(train_subset, tokenizer, min_freq=1, )

In [None]:
word2vec_pretrained = api.load('word2vec-google-news-300')



In [None]:
word2vec_pretrained_lstm_reg = LSTM2d(word2vec_pretrained, vocab, p_drop_out=0.05, weight_decay=1e-6).to(device)

In [None]:
best_weights = train(word2vec_pretrained_lstm_reg, SummaryWriter(RUNS_PATH + 'runs/word2vec_pretrained_lstm_reg'), train_subset, val_subset,collate_batch, patience=5)

Epoch 0: : 610batch [00:26, 23.08batch/s, cross_entropy=3.05]
Epoch 1: : 610batch [00:25, 24.02batch/s, cross_entropy=2.69]
Epoch 2: : 610batch [00:25, 24.37batch/s, cross_entropy=2.88]
Epoch 3: : 610batch [00:23, 25.55batch/s, cross_entropy=2.55]
Epoch 4: : 610batch [00:25, 23.98batch/s, cross_entropy=3.2]
Epoch 5: : 610batch [00:25, 23.59batch/s, cross_entropy=2.35]
Epoch 6: : 610batch [00:24, 24.54batch/s, cross_entropy=2.64]
Epoch 7: : 610batch [00:24, 25.31batch/s, cross_entropy=2.67]
Epoch 8: : 610batch [00:25, 23.47batch/s, cross_entropy=2.84]
Epoch 9: : 610batch [00:25, 23.77batch/s, cross_entropy=3.6]
Epoch 10: : 610batch [00:25, 24.26batch/s, cross_entropy=2.17]


In [None]:
word2vec_pretrained_lstm_reg.load_state_dict(best_weights)

<All keys matched successfully>

In [None]:
evaluate_model(word2vec_pretrained_lstm_reg, train_subset, collate_batch);

Accuracy score = 0.425


In [None]:
evaluate_model(word2vec_pretrained_lstm_reg, test_subset, collate_batch);

Accuracy score = 0.368


In [None]:
# %tensorboard --logdir=../reports/tensorboards/runs/word2vec_pretrained_lstm_reg --port=6012

Usage of pretrained word2vec embedding didn't improve the result, even worsened it. The reason is I guess the embedding was not a bottleneck and so making it better didn't really change much. Also the task is rather specific and so good embedding needs a fine tuning and few epochs of training was not sufficient for that task(I didn't put requeres_grad to False).

In [None]:
glove_pretrained = api.load('glove-twitter-100')



In [None]:
glove_pretrained_lstm_reg = LSTM2d(glove_pretrained, vocab, p_drop_out=0.05, weight_decay=1e-6).to(device)

In [None]:
best_weights = train(glove_pretrained_lstm_reg, SummaryWriter(RUNS_PATH + 'runs/glove_pretrained_lstm_reg'), train_subset, val_subset,collate_batch, patience=5)

Epoch 0: : 610batch [00:24, 24.54batch/s, cross_entropy=2.81]
Epoch 1: : 610batch [00:24, 24.54batch/s, cross_entropy=2.77]
Epoch 2: : 610batch [00:23, 26.14batch/s, cross_entropy=2.3]
Epoch 3: : 610batch [00:24, 24.50batch/s, cross_entropy=2.64]
Epoch 4: : 610batch [00:25, 24.10batch/s, cross_entropy=2.26]
Epoch 5: : 610batch [00:23, 25.81batch/s, cross_entropy=2.62]
Epoch 6: : 610batch [00:24, 24.72batch/s, cross_entropy=2.62]
Epoch 7: : 610batch [00:24, 24.51batch/s, cross_entropy=2.77]
Epoch 8: : 610batch [00:23, 26.18batch/s, cross_entropy=2.02]
Epoch 9: : 610batch [00:24, 24.65batch/s, cross_entropy=2.05]
Epoch 10: : 610batch [00:24, 24.55batch/s, cross_entropy=2.98]
Epoch 11: : 610batch [00:23, 25.85batch/s, cross_entropy=3.08]


In [None]:
glove_pretrained_lstm_reg.load_state_dict(best_weights)

<All keys matched successfully>

In [None]:
evaluate_model(glove_pretrained_lstm_reg, train_subset, collate_batch);

Accuracy score = 0.402


In [None]:
evaluate_model(glove_pretrained_lstm_reg, test_subset, collate_batch);

Accuracy score = 0.343


In [None]:
# %tensorboard --logdir=../reports/tensorboards/runs/glove_pretrained_lstm_reg --port=6013

The test score is a bit better then in case of pretrained word2vec but is still much worse then the trained one.

# Conclusions

All in all, this notebook appears to be more of a utils exrcise as lstm models couldn't surpass logistic regression no matter what I've tried. The best exempes are those with trained fasttext embeddings and pretrained word2vec embeddings but even those only got around 0.36 accuracy which is 2 and 4 % worse then untuned and tuned log reg respectively.