<a href="https://colab.research.google.com/github/Vincentius474/pytorch_lstm/blob/main/pytorch_lstm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import os
import torch
import numpy as np
import pandas as pd

from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [4]:
batch_size = 100
training_data = datasets.FashionMNIST(root="../fashion_mnist", train=True, transform=transforms.ToTensor(), download=True)
test_data = datasets.FashionMNIST(root="../fashion_mnist", train=False, transform=transforms.ToTensor(), download=True)

train_data_loader = DataLoader(training_data, batch_size=batch_size)
test_data_loader = DataLoader(test_data, batch_size=batch_size)

100%|██████████| 26.4M/26.4M [00:01<00:00, 13.6MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 230kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 3.77MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 17.9MB/s]


-- define hyperparameters

In [10]:
sequence_len = 28
input_len = 28
hidden_size = 128
num_layers = 2
num_classes = 10
num_epochs = 5
learning_rate = 0.01

-- create the model

In [34]:
class LSTM(nn.Module):
  def __init__(self, input_len, hidden_size, num_classes, num_layers):
    super(LSTM, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.lstm = nn.LSTM(input_len, hidden_size, num_layers, batch_first=True)
    self.output_layer = nn.Linear(hidden_size, num_classes)

  def forward(self, X):
    hidden_states = torch.zeros(self.num_layers, X.size(0), self.hidden_size)
    cell_states = torch.zeros(self.num_layers, X.size(0), self.hidden_size)
    out, _ = self.lstm(X, (hidden_states, cell_states))
    out = self.output_layer(out[:, -1, :])
    return out

-- instantiate the model

In [35]:
model= LSTM(input_len, hidden_size, num_classes, num_layers)
print(model)

LSTM(
  (lstm): LSTM(28, 128, num_layers=2, batch_first=True)
  (output_layer): Linear(in_features=128, out_features=10, bias=True)
)


-- define loss function (Cross Entropy) and an optimizer (SDG)

In [36]:
loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

-- define the training function

In [37]:
def train(num_epochs, model, train_data_loader, loss_func):
  total_steps = len(train_data_loader)

  for epoch in range(num_epochs):
    for batch,  (images, label) in enumerate(train_data_loader):
      images = images.reshape(-1, sequence_len, input_len)

      output = model(images)
      loss = loss_func(output, label)

      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      if (batch + 1)%100 == 0:
        print(f"Epoch : {epoch + 1}; Batch: {batch} / {total_steps}; Loss: {loss.item():>4f}")

-- train the model

In [38]:
train(num_epochs, model, train_data_loader, loss_func)

Epoch : 1; Batch: 99 / 600; Loss: 2.303062
Epoch : 1; Batch: 199 / 600; Loss: 2.296859
Epoch : 1; Batch: 299 / 600; Loss: 2.294654
Epoch : 1; Batch: 399 / 600; Loss: 2.290643
Epoch : 1; Batch: 499 / 600; Loss: 2.291373
Epoch : 1; Batch: 599 / 600; Loss: 2.287902
Epoch : 2; Batch: 99 / 600; Loss: 2.285956
Epoch : 2; Batch: 199 / 600; Loss: 2.282296
Epoch : 2; Batch: 299 / 600; Loss: 2.270540
Epoch : 2; Batch: 399 / 600; Loss: 2.270716
Epoch : 2; Batch: 499 / 600; Loss: 2.270984
Epoch : 2; Batch: 599 / 600; Loss: 2.257068
Epoch : 3; Batch: 99 / 600; Loss: 2.251699
Epoch : 3; Batch: 199 / 600; Loss: 2.247506
Epoch : 3; Batch: 299 / 600; Loss: 2.214070
Epoch : 3; Batch: 399 / 600; Loss: 2.211253
Epoch : 3; Batch: 499 / 600; Loss: 2.205462
Epoch : 3; Batch: 599 / 600; Loss: 2.167673
Epoch : 4; Batch: 99 / 600; Loss: 2.133408
Epoch : 4; Batch: 199 / 600; Loss: 2.118080
Epoch : 4; Batch: 299 / 600; Loss: 1.996899
Epoch : 4; Batch: 399 / 600; Loss: 1.941476
Epoch : 4; Batch: 499 / 600; Loss: 1

In [39]:
test_images, test_labels = next(iter(test_data_loader))
test_labels

tensor([9, 2, 1, 1, 6, 1, 4, 6, 5, 7, 4, 5, 7, 3, 4, 1, 2, 4, 8, 0, 2, 5, 7, 9,
        1, 4, 6, 0, 9, 3, 8, 8, 3, 3, 8, 0, 7, 5, 7, 9, 6, 1, 3, 7, 6, 7, 2, 1,
        2, 2, 4, 4, 5, 8, 2, 2, 8, 4, 8, 0, 7, 7, 8, 5, 1, 1, 2, 3, 9, 8, 7, 0,
        2, 6, 2, 3, 1, 2, 8, 4, 1, 8, 5, 9, 5, 0, 3, 2, 0, 6, 5, 3, 6, 7, 1, 8,
        0, 1, 4, 2])

In [40]:
test_output = model(test_images.view(-1, 28, 28))

In [41]:
predicted = torch.max(test_output, 1)[1]
predicted

tensor([7, 2, 1, 1, 4, 1, 9, 2, 7, 7, 9, 7, 7, 1, 4, 1, 9, 2, 9, 0, 2, 7, 7, 7,
        1, 9, 4, 0, 9, 0, 2, 0, 1, 0, 4, 3, 7, 7, 7, 9, 0, 1, 0, 7, 4, 7, 4, 1,
        2, 4, 2, 9, 7, 2, 4, 2, 9, 2, 9, 1, 7, 7, 2, 7, 1, 1, 3, 3, 7, 2, 7, 7,
        2, 0, 4, 1, 1, 4, 9, 2, 1, 2, 7, 9, 7, 0, 3, 4, 0, 2, 7, 1, 4, 7, 1, 9,
        3, 1, 4, 2])

In [43]:
correct = [1 for i in range(100) if predicted[i] == test_labels[i]]
percentage_correct = sum(correct)/100
print(percentage_correct)

0.41


In [45]:
def test_loop(data_loader, model, loss_func, optimizer):
  size = len(data_loader.dataset)
  num_batches = len(data_loader)
  test_loss, correct = 0, 0

  with torch.no_grad():
    for X, y in data_loader:
      X = X.reshape(-1, 28, 28)
      pred = model(X)
      test_loss += loss_func(pred, y).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()

  test_loss /= num_batches
  correct /= size
  print(f"Test Error:\n Accuracy: {(100*correct):>0.1f}%, Avg Loss: {test_loss:>8f}\n")
  return 100*correct

In [49]:
test_loop(test_data_loader, model, loss_func, optim.Adam(model.parameters()))

Test Error:
 Accuracy: 43.9%, Avg Loss: 1.499860



43.89