The goal of this notebook is to implement Long Short-Term memory. See:

S. Hochreiter and J. Schmidhuber, "Long Short-Term Memory," in Neural Computation, vol. 9, no. 8, pp. 1735-1780, 15 Nov. 1997, doi: 10.1162/neco.1997.9.8.1735.

I implemented this in PyTorch borrowing parameters from: https://machinelearningmastery.com/sequence-classification-lstm-recurrent-neural-networks-python-keras/

In [1]:
import numpy as np
import keras
import torch

from keras.datasets import imdb
from keras.preprocessing import sequence
from letsbuildmodels.devices import get_device
from torch.nn import BCELoss
from torch.nn import Embedding
from torch.nn import Linear
from torch.nn import LSTM
from torch.nn import Module
from torch.nn import Sigmoid
from torch.optim import Adam
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset

In [2]:
# load the dataset but only keep the top n words, zero the rest
top_words = 5000
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=top_words)

In [3]:
# truncate and pad input sequences
max_review_length = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length)
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length)

In [4]:
# Create dataloaders
device = "cpu"
batch_size = 64

train_dataset = TensorDataset(
    torch.tensor(X_train, dtype=torch.int32, device=device),
    torch.tensor(y_train, dtype=torch.float32, device=device)
)
test_dataset = TensorDataset(
    torch.tensor(X_test, dtype=torch.int32, device=device),
    torch.tensor(y_test, dtype=torch.float32, device=device)
)

trainloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
testloader = DataLoader(train_dataset, batch_size=batch_size)

In [5]:
# create the model
embedding_vecor_length = 32
class Sentiment(Module):
    def __init__(self):
        super(Sentiment, self).__init__()
        self.embedding = Embedding(top_words, embedding_vecor_length)
        self.lstm = LSTM(embedding_vecor_length, 100, batch_first=True)
        self.linear = Linear(100, 1)
        self.sigmoid = Sigmoid()

    def forward(self, x):
        embedded = self.embedding(x)
        _, (hidden, _) = self.lstm(embedded)
        hidden = hidden[0, :, :]
        outputs = self.linear(hidden)
        logits = self.sigmoid(outputs)
        return torch.flatten(logits)
        
model = Sentiment()
model.to(device)
print(model)

Sentiment(
  (embedding): Embedding(5000, 32)
  (lstm): LSTM(32, 100, batch_first=True)
  (linear): Linear(in_features=100, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


In [6]:
def to_prediction(outputs):
    return (outputs > 0.5).float()
    
def fit():
    criterion = BCELoss()
    optimizer = Adam(model.parameters())
    model.train()
    for epoch in range(10):
        epoch_loss = 0.0
        correct_predictions = 0
        count = 0
        batch = 0
        for inputs, labels in trainloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            predictions = to_prediction(outputs)
            correct_predictions += (predictions == labels).sum().item()
            count += inputs.size()[0]
            batch += 1
            print(f"Batch {batch}, size = {inputs.size()[0]}", end = "\r")
        avg_loss = epoch_loss / count
        avg_accuracy = correct_predictions / count
        print(f"Epoch {epoch+1}, Loss: {avg_loss}, Accuracy: {avg_accuracy:.2%}")

fit()

Epoch 1, Loss: 0.009957145146131515, Accuracy: 62.44%
Epoch 2, Loss: 0.008446183835268021, Accuracy: 72.88%
Epoch 3, Loss: 0.007413450375795365, Accuracy: 77.23%
Epoch 4, Loss: 0.006604318021535873, Accuracy: 80.80%
Epoch 5, Loss: 0.0063955654078722, Accuracy: 81.90%
Epoch 6, Loss: 0.006536234651207924, Accuracy: 81.30%
Epoch 7, Loss: 0.005538897587060929, Accuracy: 84.76%
Epoch 8, Loss: 0.006512709921002388, Accuracy: 81.09%
Epoch 9, Loss: 0.00595564430475235, Accuracy: 83.32%
Epoch 10, Loss: 0.004802551005482674, Accuracy: 87.43%


In [7]:
# Final evaluation of the model
def evaluate():
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        batch = 0
        for inputs, labels in testloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            predicted = (outputs > 0.5).float()
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            batch += 1
            print(f"Batch {batch}, size = {inputs.size()[0]}", end = "\r")
        print(f"Accuracy: {100 * correct / total}%")

evaluate()

Accuracy: 89.052% 40
