# model 3

In [1]:
from abc import ABC
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm, trange
from preprocessing import SentencesEmbeddingDataset
from model2_nn import train_and_plot
from torch.nn.utils import rnn
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.optim import Adam
from train_loop_model3 import train_and_plot_LSTM
from utils import remove_padding
from model3_comp import LSTM_NER_NN
from utils import print_epoch_details, remove_padding
from torch.nn.functional import one_hot

In [2]:
ds = SentencesEmbeddingDataset(vec_dim=500, list_embedding_paths=['glove-twitter-200', 'word2vec-google-news-300'])

preparing embedding...


In [None]:
batch_size = 32
train_loader, dev_loader = ds.get_data_loaders(batch_size=batch_size)

In [21]:
num_classes = 2
num_epochs = 30
hidden_dim = 32
embedding_dim = ds.vec_dim
lr = 0.001
# activation = nn.ReLU()
# activation = nn.Sigmoid()
activation = nn.Tanh()
num_layers = 1
model_save_path = (
    f"LSTM_model_stateDict_batchSize_{batch_size}_hidden_{hidden_dim}_lr_{lr}.pt"
)

LSTM_model = LSTM_NER_NN(
    embedding_dim=embedding_dim,
    num_classes=num_classes,
    hidden_dim=hidden_dim,
    model_save_path=model_save_path,
    activation=activation,
    num_layers=num_layers,
)

optimizer = Adam(params=LSTM_model.parameters(), lr=lr)

weight_0 = (ds.train_num_label_1 / ds.train_num_label_0)
weight_1 = 1 - weight_0
labels_weights = [weight_0, weight_1]
labels_weights = [0.1, 0.9]
loss_func = nn.CrossEntropyLoss(weight=torch.tensor(labels_weights))


hidden_dim=32 | activation=Tanh() | num_layers=1


In [22]:
val_loader=None
# -------
# GPU
# -------
# First checking if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print("Training on GPU.")
else:
    print("No GPU available, training on CPU.")
LSTM_model.to(device)

# ----------------------------------
# Epoch Loop
# ----------------------------------
for epoch in range(num_epochs):
    data_loaders = {"train": train_loader}
    if val_loader:
        data_loaders["validate"] = val_loader

    # prepare for evaluate
    num_classes = LSTM_model.num_classes
    train_confusion_matrix = np.zeros([num_classes, num_classes])
    val_confusion_matrix = None
    if val_loader:
        val_confusion_matrix = np.zeros([num_classes, num_classes])
    train_loss_batches_list = []

    for loader_type, data_loader in data_loaders.items():
        # num_of_batches = len(data_loader)

        for batch_num, (sentences, labels, sen_lengths) in enumerate(data_loader):
            # if training on gpu
            sentences, labels, sen_lengths = (
                sentences.to(device),
                labels.to(device),
                sen_lengths.to(device),
            )

            # forward
            outputs = LSTM_model(sentences, sen_lengths)

            # labels
            packed_labels = pack_padded_sequence(
                labels, sen_lengths, batch_first=True, enforce_sorted=False
            )
            unpacked_labels, labels_lengths = pad_packed_sequence(
                packed_labels, batch_first=True
            )
            unpadded_labels = remove_padding(unpacked_labels, labels_lengths).long()
            labels_one_hot = one_hot(unpadded_labels, num_classes=num_classes)

            # loss
            loss = loss_func(outputs, labels_one_hot.float())

            if loader_type == "train":
                train_loss_batches_list.append(loss.detach().cpu())
                # backprop
                loss.backward(retain_graph=True)
                optimizer.step()
                optimizer.zero_grad()

            # predictions
            preds = outputs.argmax(dim=1).clone().detach().cpu()
            y_true = np.array(unpadded_labels.cpu().view(-1).int())
            y_pred = np.array(preds.view(-1))
            n_preds = len(y_pred)
            for i in range(n_preds):
                if loader_type == "train":
                    train_confusion_matrix[y_true[i]][y_pred[i]] += 1
                if loader_type == "validate":
                    val_confusion_matrix[y_true[i]][y_pred[i]] += 1

        print_epoch_details(
            num_epochs,
            epoch,
            train_confusion_matrix,
            train_loss_batches_list,
            val_confusion_matrix,
            loader_type,
        )
        if loader_type == "train":
            print(train_confusion_matrix)
        if loader_type == "validate":
            print(val_confusion_matrix)

torch.save(LSTM_model.state_dict(), LSTM_model.model_save_path)


No GPU available, training on CPU.
Epoch: 1/30 | Train Avg Loss: 0.076 | Train Accuracy: 0.940 | Train F1: 0.370
[[57827.  1743.]
 [ 2048.  1112.]]
Epoch: 2/30 | Train Avg Loss: 0.062 | Train Accuracy: 0.927 | Train F1: 0.511
[[55808.  3762.]
 [  786.  2374.]]
Epoch: 3/30 | Train Avg Loss: 0.058 | Train Accuracy: 0.936 | Train F1: 0.560
[[56179.  3391.]
 [  615.  2545.]]
Epoch: 4/30 | Train Avg Loss: 0.057 | Train Accuracy: 0.942 | Train F1: 0.594
[[56459.  3111.]
 [  509.  2651.]]
Epoch: 5/30 | Train Avg Loss: 0.055 | Train Accuracy: 0.948 | Train F1: 0.626
[[56756.  2814.]
 [  436.  2724.]]
Epoch: 6/30 | Train Avg Loss: 0.054 | Train Accuracy: 0.951 | Train F1: 0.646
[[56897.  2673.]
 [  379.  2781.]]
Epoch: 7/30 | Train Avg Loss: 0.053 | Train Accuracy: 0.957 | Train F1: 0.678
[[57207.  2363.]
 [  327.  2833.]]
Epoch: 8/30 | Train Avg Loss: 0.053 | Train Accuracy: 0.961 | Train F1: 0.701
[[57401.  2169.]
 [  287.  2873.]]
Epoch: 9/30 | Train Avg Loss: 0.052 | Train Accuracy: 0.964 |

In [20]:
labels_weights

[tensor(0.0530), tensor(0.9470)]