In [1]:
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.optim import Adam
from preprocessing import SentencesEmbeddingDataset

In [2]:
embedding_type = "glove"
batch_size = 32
NER_dataset = SentencesEmbeddingDataset(embedding_model_type=embedding_type)
train_loader, dev_loader = NER_dataset.get_data_loaders(batch_size=batch_size)

prepering glove...


In [68]:
class LSTM_NER_NN(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, num_classes, model_save_path):
        super().__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(
            input_size=embedding_dim,
            hidden_size=self.hidden_dim,
            num_layers=1,
            batch_first=True,
            dropout=0,
            bidirectional=True,
        )
        self.hidden2tag = nn.Sequential(
            nn.ReLU(), nn.Linear(self.hidden_dim*2, num_classes)
        )
        self.model_save_path = model_save_path
        self.num_classes = num_classes

    def forward(self, sentences_embeddings, sen_lengths):
        # pack
        packed_input = pack_padded_sequence(
            sentences_embeddings, sen_lengths, batch_first=True, enforce_sorted=False
        )
        lstm_packed_output, (ht, ct) = self.lstm(input=packed_input)
        # unpack
        lstm_out_padded, out_lengths = pad_packed_sequence(
            lstm_packed_output, batch_first=True
        )
        # reshape from sentences to words
        words_lstm_out = lstm_out_padded.view(-1, self.hidden_dim*2)
        # hidden -> tag score -> prediction -> loss
        tag_space = self.hidden2tag(words_lstm_out)
        tag_score = F.softmax(tag_space, dim=1)
        return tag_score

In [69]:
num_classes = 2
num_epochs = 5
hidden_dim = 64
embedding_dim = NER_dataset.VEC_DIM
lr = 0.001
model_save_path = (
    f"LSTM_model_stateDict_batchSize_{batch_size}_hidden_{hidden_dim}_lr_{lr}.pt"
)

LSTM_model = LSTM_NER_NN(
    embedding_dim=embedding_dim,
    num_classes=num_classes,
    hidden_dim=hidden_dim,
    model_save_path=model_save_path,
)

In [78]:
import numpy as np
import torch
from tqdm import tqdm

from utils import print_batch_details, print_epoch_details


def train_and_plot_LSTM(
    LSTM_model,
    train_loader,
    num_epochs: int,
    optimizer,
    loss_func,
    val_loader=None,
):

    # -------
    # GPU
    # -------
    # First checking if GPU is available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if torch.cuda.is_available():
        print("Training on GPU.")
    else:
        print("No GPU available, training on CPU.")
    LSTM_model.to(device)

    # ----------------------------------
    # Epoch Loop
    # ----------------------------------
    for epoch in range(num_epochs):
        data_loaders = {"train": train_loader}
        if val_loader:
            data_loaders["validate"] = val_loader

        # prepare for evaluate
        num_classes = LSTM_model.num_classes
        train_confusion_matrix = np.zeros([num_classes, num_classes])
        val_confusion_matrix = None
        if val_loader:
            val_confusion_matrix = np.zeros([num_classes, num_classes])
        train_loss_batches_list = []

        for loader_type, data_loader in data_loaders.items():
            num_of_batches = len(data_loader)

            for batch_num, (sentences, labels, sen_lengths) in enumerate(
                tqdm(data_loader)
            ):
                # if training on gpu
                sentences, labels, sen_lengths = (
                    sentences.to(device),
                    labels.to(device),
                    sen_lengths.to(device),
                )

                # forward
                outputs = LSTM_model(sentences, sen_lengths)

                # labels
                packed_labels = pack_padded_sequence(
                    labels, sen_lengths, batch_first=True, enforce_sorted=False
                )
                unpacked_labels, labels_lengths = pad_packed_sequence(
                    packed_labels, batch_first=True
                )
                labels_tensor = torch.tensor(unpacked_labels.view(-1)).long()
                labels_one_hot = nn.functional.one_hot(labels_tensor, num_classes=num_classes)

                # loss
                loss = loss_func(outputs, labels_one_hot.float())

                if loader_type == "train":
                    train_loss_batches_list.append(loss.detach().cpu())
                    # backprop
                    loss.backward(retain_graph=True)
                    optimizer.step()
                    optimizer.zero_grad()

                # predictions
                preds = outputs.argmax(dim=-1).clone().detach().cpu()
                y_true = np.array(labels.cpu().view(-1).int())
                y_pred = np.array(preds.view(-1))
                n_preds = len(y_pred)
                for i in range(n_preds):
                    if loader_type == "train":
                        train_confusion_matrix[y_true[i]][y_pred[i]] += 1
                    if loader_type == "validate":
                        val_confusion_matrix[y_true[i]][y_pred[i]] += 1
                # print
                if batch_num % 50 == 0:
                    print_batch_details(
                        num_of_batches,
                        batch_num,
                        loss,
                        train_confusion_matrix,
                        val_confusion_matrix,
                        loader_type,
                    )

            print_epoch_details(
                num_epochs,
                epoch,
                train_confusion_matrix,
                train_loss_batches_list,
                val_confusion_matrix,
                loader_type,
            )
    torch.save(LSTM_model.state_dict(), LSTM_model.model_save_path)


In [79]:
optimizer = Adam(params=LSTM_model.parameters(), lr=lr)
loss_func = nn.CrossEntropyLoss()

train_and_plot_LSTM(
    LSTM_model=LSTM_model,
    train_loader=train_loader,
    num_epochs=num_epochs,
    val_loader=dev_loader,
    optimizer=optimizer,
    loss_func=loss_func,
)

No GPU available, training on CPU.


  labels_tensor = torch.tensor(unpacked_labels.view(-1)).long()
  0%|          | 0/107 [00:00<?, ?it/s]

sentences_embeddings: torch.Size([32, 41, 200])
sentences_embeddings[30]: tensor([[-0.5888,  0.0998, -0.4117,  ...,  0.5522, -0.1015,  0.8154],
        [-0.0948, -0.0337, -0.1041,  ..., -0.1714,  0.5494,  0.6107],
        [ 0.2641,  0.1720,  0.0828,  ...,  0.3114, -0.0886, -0.0814],
        ...,
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]])
sen_lengths: torch.Size([32])
tensor([14,  9, 38, 16, 28,  6, 21, 31, 30, 11, 17, 19, 23, 25, 13, 24, 17, 32,
        13, 10, 16, 26,  7,  9, 21, 23, 19, 10,  3, 22, 15, 12])
packed_input:
packed_input[0]: tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,
          0.0000e+00,  0.0000e+00],
        [-3.1452e-01,  2.2976e-01,  5.0144e-01,  ...,  9.6154e-01,
         -5.0519e-01, -3.9491e-01],
        [ 5.6404e-02,  4.9536e-01,  1.8439e-01,  ...,  6.3598e-01,
         -1.8




RuntimeError: Expected floating point type for target with class probabilities, got Long