In [1]:
import sys
sys.path.append("../..")

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence

class SentimentAnalysis(nn.Module):
    def __init__(
            self,
            vocab_size: int,
            hidden_size: int,
            embedding_size: int,
        ):
        super().__init__()
        self._hidden_size = hidden_size
        self._embedding_size = embedding_size

        self.emb = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_size)

        self.gru = nn.GRU(
            input_size=self._embedding_size,
            hidden_size=self._hidden_size,
        )

        inner_size = 100

        self.seq = nn.Sequential(
            nn.Linear(hidden_size, inner_size),
            nn.ReLU(),
            nn.Linear(inner_size, 2),
        )

    def forward(self, x: torch.Tensor, lengths: torch.Tensor):
        # x (n_batch, seq)

        # apply embeddings
        x = self.emb(x)
        # x (n_batch, seq, emb_dim)

        x = x.transpose(0, 1)
        # x (seq, n_batch, emb_dim)

        x = pack_padded_sequence(x, lengths, enforce_sorted=False)

        # take last hidden layer output
        _, h_n = self.gru(x)

        return self.seq(h_n[-1, :, :])

In [2]:
from tut.sentiment_analysis.helpers import load_sentiment_data, load_tokenizer, calc_accuracy

(
    train_data,
    train_labels,
    train_lengths,
    test_data,
    test_labels,
    test_lengths,
) = load_sentiment_data()

tokenizer = load_tokenizer()
vocab_size = tokenizer.get_vocab_size()

In [4]:
from tqdm import tqdm

embedding_size = 40
hidden_size = 50

device = "cuda" if torch.cuda.is_available() else "cpu"

model = SentimentAnalysis(
    vocab_size=vocab_size,
    hidden_size=hidden_size,
    embedding_size=embedding_size,
).to(device)

optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)
loss_function = F.cross_entropy

epochs = 5
batch_size = 2000
eval_batch_size = 5000

for epoch in range(epochs):
    print(epoch, epochs)
    for i in tqdm(range(0, len(train_data), batch_size)):
        inputs = train_data[i : i + batch_size].to(device)
        labels = train_labels[i : i + batch_size].to(device)
        lengths = train_lengths[i : i + batch_size]

        outputs = model(inputs, lengths)
        loss = loss_function(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    train_correct, train_total, train_loss = calc_accuracy(
        model=model,
        loss_function=loss_function,
        data=train_data,
        labels=train_labels,
        lengths=train_lengths,
        batch_size=eval_batch_size,
        device=device,
    )

    test_correct, test_total, test_loss = calc_accuracy(
        model=model,
        loss_function=loss_function,
        data=test_data,
        labels=test_labels,
        lengths=test_lengths,
        batch_size=eval_batch_size,
        device=device,
    )

    print(f"train: {train_correct / train_total: 0.2f}, {train_loss: 0.4f}")
    print(f"test: {test_correct / test_total: 0.2f}, {test_loss: 0.4f}")

0 5


100%|██████████| 20/20 [00:19<00:00,  1.03it/s]
 92%|█████████▎| 37/40 [00:09<00:00,  3.85it/s]