<a href="https://colab.research.google.com/github/gondow86/lstm_cnn_ensemble/blob/dev/LSTM_tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
print(device)

cuda


In [None]:
# Hyper parameters
num_classes = 10
num_epochs = 2
batch_size = 100
learning_rate = 0.001

input_size = 28
sequence_length = 28
hidden_size = 128
num_layers = 2

In [None]:
from torchvision.transforms.transforms import ToTensor
train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [None]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

In [None]:
class RNN(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, num_classes) -> None:
    super().__init__()
    self.num_layers = num_layers
    self.hidden_size = hidden_size
    self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)

    self.fc = nn.Linear(hidden_size, num_classes)

  def forward(self, x):
    h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)

    out, _ = self.rnn(x, h0)

    out = out[:, -1, :]

    out = self.fc(out)

    return out

In [None]:
model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
    images = images.reshape(-1, sequence_length, input_size).to(device)
    labels = labels.to(device)

    outputs = model(images)
    loss = criterion(outputs, labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i + 1) % 100 == 0:
      print(f'Epoch [{epoch+1}/`{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

Epoch [1/`2], Step [100/600], Loss: 1.2537
Epoch [1/`2], Step [200/600], Loss: 0.7315
Epoch [1/`2], Step [300/600], Loss: 0.4889
Epoch [1/`2], Step [400/600], Loss: 0.3594
Epoch [1/`2], Step [500/600], Loss: 0.4233
Epoch [1/`2], Step [600/600], Loss: 0.4498
Epoch [2/`2], Step [100/600], Loss: 0.1918
Epoch [2/`2], Step [200/600], Loss: 0.2962
Epoch [2/`2], Step [300/600], Loss: 0.3595
Epoch [2/`2], Step [400/600], Loss: 0.1255
Epoch [2/`2], Step [500/600], Loss: 0.2207
Epoch [2/`2], Step [600/600], Loss: 0.2936


In [None]:
from IPython.testing import test
with torch.no_grad():
  n_correct = 0
  n_samples = 0
  for images, labels in test_loader:
    images = images.reshape(-1, sequence_length, input_size).to(device)
    labels = labels.to(device)
    outputs = model(images)

    _, predicted = torch.max(outputs.data, 1)
    n_samples += labels.size(0)
    n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10000 test images: {acc} %')
    

Accuracy of the network on the 10000 test images: 96.0 %
Accuracy of the network on the 10000 test images: 95.0 %
Accuracy of the network on the 10000 test images: 94.66666666666667 %
Accuracy of the network on the 10000 test images: 95.25 %
Accuracy of the network on the 10000 test images: 94.8 %
Accuracy of the network on the 10000 test images: 94.0 %
Accuracy of the network on the 10000 test images: 93.57142857142857 %
Accuracy of the network on the 10000 test images: 93.75 %
Accuracy of the network on the 10000 test images: 94.0 %
Accuracy of the network on the 10000 test images: 94.0 %
Accuracy of the network on the 10000 test images: 94.0 %
Accuracy of the network on the 10000 test images: 93.83333333333333 %
Accuracy of the network on the 10000 test images: 93.53846153846153 %
Accuracy of the network on the 10000 test images: 93.85714285714286 %
Accuracy of the network on the 10000 test images: 93.73333333333333 %
Accuracy of the network on the 10000 test images: 93.6875 %
Accur