In [2]:
import torch
import pickle
from torch import nn
from torch.utils.data import DataLoader

print("PyTorch version:")
print(torch.__version__)
print("GPU Detected:")
# print(torch.cuda.is_available())
print(torch.backends.mps.is_available())

# defining a shortcut function for later:
import os

# gpu = torch.device("cuda:0")
gpu = torch.device("mps")

PyTorch version:
2.2.0
GPU Detected:
True


In [3]:
with open("./data/train_text", "rb") as f:
    train_text = pickle.load(f)
with open("./data/test_text", "rb") as f:
    test_text = pickle.load(f)

train_text_data = DataLoader(train_text, batch_size=1, shuffle=True)
test_text_data = DataLoader(test_text, batch_size=1, shuffle=True)

In [4]:
print(len(train_text_data.dataset))
print(len(test_text_data.dataset))

571
142


In [5]:
# check batch dimension
batch_size = train_text_data.batch_size
for data, label in train_text_data:
    print("shape: {0}".format(data.size()))
    break
sequence_length = data.size()[1]

# set batch first true. 

shape: torch.Size([1, 807, 300])


In [19]:
class RNNClassifier(nn.Module):
    def __init__(self, hidden_size, input_size, num_layers, num_classes, dropout, activation_fn):
        super(RNNClassifier, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.nonlinearity = activation_fn

    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.gru.num_layers, x.size(0), self.gru.hidden_size).to(x.device)
        
        # Forward propagate GRU
        # out shape: (batch_size, seq_len, hidden_size)
        out, _ = self.gru(x, h0)
        
        # Select the output of the last time step
        # out shape: (batch_size, hidden_size)
        out = out[:, -1, :]
        
        # Forward propagate through fully connected layer
        # out shape: (batch_size, output_size)
        out = self.fc(out)


        out_distribution = nn.functional.log_softmax(out, dim=-1)
        return out_distribution


def train(
    train_dataloader, test_dataloader, nll_criterion, num_epochs, ffnn, ffnn_optimizer
):
    # A counter for the number of gradient updates we've performed.
    num_iter = 0

    # Iterate `num_epochs` times.
    for epoch in range(num_epochs):
        print("Starting epoch {}".format(epoch + 1))
        # Iterate over the train_dataloader, unpacking the images and labels
        for data, labels in train_dataloader:
            # If we're using the GPU, move reshaped_images and labels to the GPU.
            if gpu:
                data = data.to(gpu)
                labels = labels.to(gpu)

            # Run the forward pass through the model to get predicted log distribution.
            predicted = ffnn(data)

            # Calculate the loss
            batch_loss = nll_criterion(predicted, labels)

            # Clear the gradients as we prepare to backprop.
            ffnn_optimizer.zero_grad()

            # Backprop (backward pass), which calculates gradients.
            batch_loss.backward()

            # Take a gradient step to update parameters.
            ffnn_optimizer.step()

            # Increment gradient update counter.
            num_iter += 1

            # Calculate test set loss and accuracy every 500 gradient updates
            # It's standard to have this as a separate evaluate function, but
            # we'll place it inline for didactic purposes.
            if num_iter % 500 == 0:
                # Set model to eval mode, which turns off dropout.
                ffnn.eval()
                # Counters for the num of examples we get right / total num of examples.
                num_correct = 0
                total_examples = 0
                total_test_loss = 0

                with torch.no_grad():
                    # Iterate over the test dataloader
                    for test_data, test_labels in test_dataloader:

                        # If we're using the GPU, move tensors to the GPU.
                        if gpu:
                            test_data = test_data.to(gpu)
                            test_labels = test_labels.to(gpu)

                        # Run the forward pass to get predicted distribution.
                        predicted = ffnn(test_data)

                        # Calculate loss for this test batch. This is averaged, so multiply
                        # by the number of examples in batch to get a total.
                        total_test_loss += nll_criterion(
                            predicted, test_labels
                        ).data * test_labels.size(0)

                        # Get predicted labels (argmax)
                        _, predicted_labels = torch.max(predicted.data, 1)

                        # Count the number of examples in this batch
                        total_examples += test_labels.size(0)

                        # Count the total number of correctly predicted labels.
                        # predicted == labels generates a ByteTensor in indices where
                        # predicted and labels match, so we can sum to get the num correct.
                        num_correct += torch.sum(predicted_labels == test_labels.data)
                accuracy = 100 * num_correct / total_examples
                average_test_loss = total_test_loss / total_examples
                print(
                    "Iteration {}. Test Loss {}. Test Accuracy {}.".format(
                        num_iter, average_test_loss, accuracy
                    )
                )
                # Set the model back to train mode, which activates dropout again.
                ffnn.train()

In [20]:
activation_fn = nn.ReLU()
model = RNNClassifier(
    hidden_size=128, input_size=300, num_layers=1, num_classes=2, dropout=0.5, activation_fn=activation_fn
)
nll_criterion = nn.NLLLoss()
optimiser = torch.optim.Adam(model.parameters())
model.to(gpu)

RNNClassifier(
  (gru): GRU(300, 128, batch_first=True, dropout=0.5)
  (fc): Linear(in_features=128, out_features=2, bias=True)
  (nonlinearity): ReLU()
)

In [21]:
num_epochs = 5
train(
    train_text_data, 
    test_text_data, 
    nll_criterion, 
    num_epochs, 
    model, 
    optimiser
)


Starting epoch 1
Iteration 500. Test Loss 0.7082117199897766. Test Accuracy 53.52112579345703.
Starting epoch 2
Iteration 1000. Test Loss 0.714120090007782. Test Accuracy 53.52112579345703.
Starting epoch 3
Iteration 1500. Test Loss 0.700814962387085. Test Accuracy 52.81690216064453.
Starting epoch 4
Iteration 2000. Test Loss 0.7061221599578857. Test Accuracy 53.52112579345703.
Starting epoch 5
Iteration 2500. Test Loss 0.6358367204666138. Test Accuracy 71.12676239013672.
