# 11. Neural Networks with MNIST

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
from torch.utils.data import DataLoader

import torchvision.datasets as dsets
import torchvision.transforms as transforms

import numpy as np
import os

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

## 11.1 Prepare MNIST Data

In [3]:
# https://pytorch.org/docs/stable/torchvision/transforms.html

transform = transforms.Compose([
#           Data Augmentation
#           transforms.RandomRotation(15)
#           transforms.CenterCrop(28),
#           transforms.Lambda(lambda x: x.rotate(15)),
    
#           Data Nomalization
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.5,), std=(0.5,))
# Normalize a tensor image with mean and standard deviation.
# Given mean: (M1,...,Mn) and std: (S1,..,Sn) for n channels,
# this transform will normalize each channel of the input torch.
# *Tensor i.e. input[channel] = (input[channel] - mean[channel]) / std[channel]
            ])


train_data = dsets.MNIST(root='data/',
                         train=True,
                         transform=transform,
                         download=True)

test_data = dsets.MNIST(root='data/',
                        train=False,
                        transform=transform,
                        download=True)

In [4]:
train_data.data.size(), test_data.data.size()

(torch.Size([60000, 28, 28]), torch.Size([10000, 28, 28]))

## 11.2 Make Batch Loader

In [50]:
batch_size = 50

train_loader  = DataLoader(dataset=train_data,
                           batch_size=batch_size,
                           shuffle=True,
                           num_workers=1)

test_loader  = DataLoader(dataset=test_data,
                           batch_size=batch_size,
                           shuffle=True,
                           num_workers=1)

## 11.3 Define Model

In [51]:
model = torch.nn.Sequential(
    torch.nn.Linear(784, 512),
    torch.nn.ReLU(),
    #Dropout
    nn.Dropout(0.35),
    
    torch.nn.Linear(512, 256),
    torch.nn.ReLU(),
    
    nn.Dropout(0.35),
    
    torch.nn.Linear(256, 64),
    #Batch Nomalization
    nn.BatchNorm1d(64),
    torch.nn.ReLU(),
    
    torch.nn.Linear(64, 10)
)

In [52]:
# Weight Initialization

for m in model.modules() :
    if isinstance(m, nn.Linear):
        # init.xavier_normal(m.weight.data)
        init.kaiming_normal_(m.weight.data)
        m.bias.data.fill_(0)                

In [53]:
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Momentum & Weight Regularization(L2)
# optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-5)

## 11.4 Train Model

In [54]:
num_epochs = 10

In [55]:
def test_model() :
    
    model.eval()
    
    correct = 0
    total = 0

    for images, labels in test_loader :

        outputs = model(images.view(-1, 28 * 28))
        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum()

    print('Accuracy of test images: %f %%' % (75 * float(correct) / total))

In [56]:
save_path = './model/'

if not os.path.exists(save_path):
    os.makedirs(save_path)

In [58]:
for epoch in range(num_epochs):
    
    total_batch = len(train_data) // batch_size
    
    for i, (batch_images, batch_labels) in enumerate(train_loader):
        
        X = batch_images.view(-1, 28 * 28)
        Y = batch_labels
        
        pre = model(X)
        cost = loss(pre, Y)
        
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        if (i+1) % 300 == 0:
            print('Epoch [%d/%d], lter [%d/%d], Loss: %.4f'
                 %(epoch+1, num_epochs, i+1, total_batch, cost.item()))
            
            # Test Model
            test_model()
            
            # Save Model
            model_path = save_path + 'model_' + str(epoch+1) + '_' + str(i+1) + '.pth'
            print(model_path)
            torch.save(model.state_dict(), model_path)
            
            model.train()
    
print("Learning Finished!")

Epoch [1/10], lter [300/1200], Loss: 0.4940
Accuracy of test images: 69.517500 %
./model/model_1_300.pth
Epoch [1/10], lter [600/1200], Loss: 0.4106
Accuracy of test images: 70.485000 %
./model/model_1_600.pth
Epoch [1/10], lter [900/1200], Loss: 0.3258
Accuracy of test images: 71.212500 %
./model/model_1_900.pth
Epoch [1/10], lter [1200/1200], Loss: 0.1522
Accuracy of test images: 70.807500 %
./model/model_1_1200.pth
Epoch [2/10], lter [300/1200], Loss: 0.2277
Accuracy of test images: 71.137500 %
./model/model_2_300.pth
Epoch [2/10], lter [600/1200], Loss: 0.1946
Accuracy of test images: 71.587500 %
./model/model_2_600.pth
Epoch [2/10], lter [900/1200], Loss: 0.0489
Accuracy of test images: 71.542500 %
./model/model_2_900.pth
Epoch [2/10], lter [1200/1200], Loss: 0.1766
Accuracy of test images: 72.097500 %
./model/model_2_1200.pth
Epoch [3/10], lter [300/1200], Loss: 0.2022
Accuracy of test images: 71.880000 %
./model/model_3_300.pth
Epoch [3/10], lter [600/1200], Loss: 0.1050
Accurac

## 11.5 Test Model

In [13]:
model = torch.nn.Sequential(
    torch.nn.Linear(784, 512),
    torch.nn.ReLU(),
    #Dropout
    nn.Dropout(0.5),
    
    torch.nn.Linear(512, 350),
    torch.nn.ReLU(),
    
    torch.nn.Linear(350, 256),
    torch.nn.ReLU(),
    
    torch.nn.Linear(256, 64),
    #Batch Nomalization
    nn.BatchNorm1d(64),
    torch.nn.ReLU(),
    
    torch.nn.Linear(64, 10)
)

In [60]:
model.load_state_dict(torch.load('./model/model_10_1200.pth'))

model.eval()

correct = 0
total = 0

for images, labels in test_data:
    
    images  = images.view(-1, 28 * 28)
    outputs = model(images)
    
    _, predicted = torch.max(outputs.data, 1)
    total += 1
    correct += (predicted == labels).sum()
    
print('Accuracy of test images: %f %%' % (100 * float(correct) / total))

Accuracy of test images: 97.720000 %
