<a href="https://colab.research.google.com/github/harryypham/MyMLPractice/blob/main/nlp/RNN_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets
from torch.utils.data import DataLoader


In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
input_size = 28
sequence_length = 28
num_layers = 2
hidden_size = 256
num_classes = 10
batch_size = 64
lr = 3e-4
num_epochs = 5

In [3]:
train_set = datasets.MNIST(root="data", train=True, download=True, transform=transforms.ToTensor())
test_set = datasets.MNIST(root="data", train=False, download=True, transform=transforms.ToTensor())

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True)

In [4]:
class RNN(nn.Module):
  def __init__(self, input_size, sequence_length, hidden_size, num_layers, num_classes):
    super().__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
    self.fc = nn.Linear(hidden_size * sequence_length, num_classes)

  def forward(self, x):
    h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

    out, _ = self.rnn(x, h0)
    out = out.reshape(out.shape[0], -1)
    out = self.fc(out)

    return out


In [5]:
def train(model, criterion, optimizer, data_loader, num_epochs, device):
  model.train()
  losses = []
  for epoch in range(1, num_epochs+1):
    print(f"Epoch {epoch}: ")
    pbar = tqdm(data_loader, leave=True)
    for batch_idx, (input, target) in enumerate(pbar):
      input = input.to(device).squeeze(1)
      target = target.to(device)

      output = model(input)

      loss = criterion(output, target)
      losses.append(loss.item())
      optimizer.zero_grad()
      loss.backward()

      optimizer.step()

      pbar.set_postfix({"Loss": round(sum(losses)/len(losses), 4)})

@torch.no_grad()
def check_accuracy(model, data_loader, device):
  correct = total = 0

  model.eval()
  for input, target in data_loader:
    input = input.to(device).squeeze(1)
    target = target.to(device)

    output = model(input)

    _, preds = output.max(1)
    correct += (preds == target).sum()
    total += target.size(0)

  print(f"Accuracy: {correct/total*100:.2f}")
  model.train()


In [6]:
model = RNN(input_size, sequence_length, hidden_size, num_layers, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

train(model, criterion, optimizer, train_loader, num_epochs, device)

Epoch 1: 


100%|██████████| 938/938 [00:10<00:00, 89.45it/s, Loss=0.312]


Epoch 2: 


100%|██████████| 938/938 [00:10<00:00, 92.99it/s, Loss=0.222]


Epoch 3: 


100%|██████████| 938/938 [00:10<00:00, 92.60it/s, Loss=0.178]


Epoch 4: 


100%|██████████| 938/938 [00:09<00:00, 93.94it/s, Loss=0.152]


Epoch 5: 


100%|██████████| 938/938 [00:09<00:00, 94.01it/s, Loss=0.134]


In [7]:
check_accuracy(model, test_loader, device)

Accuracy: 98.14
