# 030. mnist 손글씨 인식 - LeNet 

In [5]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn, optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn.functional as F

torch.manual_seed(123)

<torch._C.Generator at 0x1a6c13559d0>

In [6]:
# CUDA for PyTorch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [13]:
# load mnist data
TRAIN_BATCH_SIZE = 128
TEST_BATCH_SIZE = 1000

 # Fetch train data: total 60000 samples
train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('./data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.Resize((32, 32)),
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=TRAIN_BATCH_SIZE, shuffle=True)

# Fetch test data: total 10000 samples
validation_loader = torch.utils.data.DataLoader(
        datasets.MNIST('./data', train=False, download=True,
                       transform=transforms.Compose([
                           transforms.Resize((32, 32)),
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                    ])),
        batch_size=TEST_BATCH_SIZE, shuffle=True)

In [14]:
class LeNet(nn.Module):
    def __init__(self):
        super().__init__()
        # input channel = 1, output channel = 6, kernel_size = 5
        # input size = (32, 32), output size = (28, 28)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5)
        # input channel = 6, output channel = 16, kernel_size = 5
        # input size = (14, 14), output size = (10, 10)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        # input dim = 16*5*5, output dim = 120
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        # input dim = 120, output dim = 84
        self.fc2 = nn.Linear(120, 84)
        # input dim = 84, output dim = 10
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        # pool size = 2
        # input size = (28, 28), output size = (14, 14), output channel = 6
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        # pool size = 2
        # input size = (10, 10), output size = (5, 5), output channel = 16
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
       # flatten as one dimension
        x = x.view(x.size()[0], -1)
        # input dim = 16*5*5, output dim = 120
        x = F.relu(self.fc1(x))
        # input dim = 120, output dim = 84
        x = F.relu(self.fc2(x))
        # input dim = 84, output dim = 10
        x = F.relu(self.fc3(x))     
        x = self.fc1(x)
        return x

In [15]:
model = LeNet()
model = model.to(device)

### Model Summary

In [16]:
model.train()

LeNet(
  (conv1): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

### Loss Function

In [17]:
criterion = nn.CrossEntropyLoss()
# loss 를 gpu 로 transfer
criterion = criterion.to(device)

In [18]:
lr = 0.01
momentum=0.5
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

In [19]:
LOSS = []
ACCURACY = []

EPOCHS = 5

for epoch in range(EPOCHS):
    cost = 0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        
        optimizer.zero_grad()
    
        z = model(x)
        loss = criterion(z, y)
        loss.backward()
        optimizer.step()
        cost += loss.data
        
    LOSS.append(cost)
        
    correct = 0
    for x, y in validation_loader:
        x, y = x.to(device), y.to(device)
        
        z = model(x)
        _, label = torch.max(z, 1)
        correct += (label == y).sum().item()
        
    accuracy = 100 * (correct / len(validation_dataset))
    ACCURACY.append(accuracy)
    
    print(f'epoch {epoch} ==> train loss: {cost}, valididation accuracy: {accuracy}')

RuntimeError: Given groups=1, weight of size 16 6 5 5, expected input[128, 16, 14, 14] to have 6 channels, but got 16 channels instead

In [None]:
plt.figure(figsize=(8, 4))

fig, ax1 = plt.subplots()
color = 'tab:red'
ax1.plot(LOSS, color=color)
ax1.set_xlabel('epoch', color=color)
ax1.set_ylabel('total loss', color=color)

color='tab:blue'
ax2 = ax1.twinx()
ax2.plot(ACCURACY, color=color)
ax2.set_ylabel('accuracy', color=color)

In [None]:
%matplotlib inline

plt.figure(figsize=(7, 7))
count = 0

for x, y in torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=1):
    if count > 8:
        break
    x = x.to(device)
    z = model(x)
    _, yhat = torch.max(z, 1)
    yhat = yhat.to("cpu")
    if yhat == y:
        plt.subplot(3, 3, count+1)
        plt.imshow(x.to("cpu").reshape(28, 28), cmap='gray', interpolation='none')  
        plt.title("Class: {}, Predicted: {}".format(y.item(), yhat.item()))
        plt.tight_layout()
        plt.xticks([])
        plt.yticks([])
        count += 1

plt.figure(figsize=(7, 7))
count = 0

for x, y in torch.utils.data.DataLoader(dataset=validation_dataset, batch_size=1):
    if count > 8:
        break
    x = x.to(device)
    z = model(x)
    _, yhat = torch.max(z, 1)
    yhat = yhat.to("cpu")

    if yhat != y:
        plt.subplot(3, 3, count+1)
        plt.imshow(x.to("cpu").reshape(28, 28), cmap='gray', interpolation='none')  
        plt.title("Class: {}, Predicted: {}".format(y.item(), yhat.item()))
        plt.tight_layout()
        plt.xticks([])
        plt.yticks([])
        count += 1