<a href="https://colab.research.google.com/github/hewanshrestha/Visual-Computing-Exercises/blob/master/MNIST_with_Stacked_Autoencoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Loading Packages**

In [1]:
import torch
import numpy as np
import torchvision
from PIL import Image
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import torchvision.transforms as transforms

**Loading Data**

In [2]:
transform = transforms.Compose([transforms.ToTensor()])
BatchSize = 1000

trainset = torchvision.datasets.MNIST(root='./MNIST', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BatchSize,
                                          shuffle=True, num_workers=4) # Creating dataloader

testset = torchvision.datasets.MNIST(root='./MNIST', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=BatchSize,
                                         shuffle=False, num_workers=4) # Creating dataloader

classes = ('zero', 'one', 'two', 'three',
           'four', 'five', 'six', 'seven', 'eight', 'nine')

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./MNIST/MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./MNIST/MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./MNIST/MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./MNIST/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/MNIST/raw



  cpuset_checked))


In [3]:
# Check availability of GPU

use_gpu = torch.cuda.is_available()
if use_gpu:
    print('GPU is available!')
    device = "cuda"
else:
    print('GPU is not available!')
    device = "cpu"

GPU is available!


**Defining Autoencoder**

In [8]:
class autoencoder(nn.Module):
    def __init__(self):
        super(autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(28*28, 400),
            nn.Tanh())
        self.decoder = nn.Sequential(
            nn.Linear(400, 28*28),
            nn.Sigmoid())

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


net = autoencoder()
print(net)

net = net.double().to(device)

autoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=784, out_features=400, bias=True)
    (1): Tanh()
  )
  (decoder): Sequential(
    (0): Linear(in_features=400, out_features=784, bias=True)
    (1): Sigmoid()
  )
)


**Training Autoencoder**

In [9]:
epochs = 10
learning_rate = 0.98
criterion = nn.MSELoss()

for epoch in range(epochs):  # loop over the dataset multiple times
    runningLoss = 0.0
    for data in trainloader:
        # get the inputs
        inputs, labels = data
        inputs = inputs.view(-1, 28*28).double().to(device)
        net.zero_grad()  # zeroes the gradient buffers of all parameters
        outputs = net(inputs) # forward 
        loss = criterion(outputs, inputs) # calculate loss
        loss.backward() #  backpropagate the loss
        for f in net.parameters():
            f.data.sub_(f.grad.data * learning_rate) # weight = weight - learning_rate * gradient (Update Weights)
        runningLoss += loss.item()
        
    print('At Iteration : %d / %d  ;  Mean-Squared Error : %f'%(epoch + 1,epochs,
                                                                        runningLoss/(60000/BatchSize)))
print('Finished Training')

  cpuset_checked))


At Iteration : 1 / 10  ;  Mean-Squared Error : 0.191227
At Iteration : 2 / 10  ;  Mean-Squared Error : 0.100686
At Iteration : 3 / 10  ;  Mean-Squared Error : 0.077662
At Iteration : 4 / 10  ;  Mean-Squared Error : 0.072503
At Iteration : 5 / 10  ;  Mean-Squared Error : 0.070379
At Iteration : 6 / 10  ;  Mean-Squared Error : 0.069144
At Iteration : 7 / 10  ;  Mean-Squared Error : 0.068259
At Iteration : 8 / 10  ;  Mean-Squared Error : 0.067529
At Iteration : 9 / 10  ;  Mean-Squared Error : 0.066862
At Iteration : 10 / 10  ;  Mean-Squared Error : 0.066211
Finished Training


**Stacking Layers**

In [10]:
# Adding New Layer (Stacking)
net.encoder.add_module('New_Encoder_Layer', nn.Sequential(nn.Linear(400, 256),nn.Tanh()))
net.encoder.add_module('New_Decoder_Layer', nn.Sequential(nn.Linear(256, 400),nn.Tanh()))
print(net)
net = net.double().to(device)

autoencoder(
  (encoder): Sequential(
    (0): Linear(in_features=784, out_features=400, bias=True)
    (1): Tanh()
    (New_Encoder_Layer): Sequential(
      (0): Linear(in_features=400, out_features=256, bias=True)
      (1): Tanh()
    )
    (New_Decoder_Layer): Sequential(
      (0): Linear(in_features=256, out_features=400, bias=True)
      (1): Tanh()
    )
  )
  (decoder): Sequential(
    (0): Linear(in_features=400, out_features=784, bias=True)
    (1): Sigmoid()
  )
)


**Train Autoencoder**

In [11]:
for epoch in range(epochs):  # loop over the dataset multiple times
    runningLoss = 0.0
    for data in trainloader:
        # get the inputs
        inputs, labels = data
        inputs = inputs.view(-1, 28*28).double().to(device)
        net.zero_grad()  # zeroes the gradient buffers of all parameters
        outputs = net(inputs) # forward 
        loss = criterion(outputs, inputs) # calculate loss
        loss.backward() #  backpropagate the loss
        for f in net.parameters():
            f.data.sub_(f.grad.data * learning_rate) # weight = weight - learning_rate * gradient (Update Weights)
        runningLoss += loss.item()
        
    print('At Iteration : %d / %d  ;  Mean-Squared Error : %f'%(epoch + 1,epochs,
                                                                        runningLoss/(60000/BatchSize)))

  cpuset_checked))


At Iteration : 1 / 10  ;  Mean-Squared Error : 0.077208
At Iteration : 2 / 10  ;  Mean-Squared Error : 0.068336
At Iteration : 3 / 10  ;  Mean-Squared Error : 0.067955
At Iteration : 4 / 10  ;  Mean-Squared Error : 0.067775
At Iteration : 5 / 10  ;  Mean-Squared Error : 0.067659
At Iteration : 6 / 10  ;  Mean-Squared Error : 0.067573
At Iteration : 7 / 10  ;  Mean-Squared Error : 0.067504
At Iteration : 8 / 10  ;  Mean-Squared Error : 0.067445
At Iteration : 9 / 10  ;  Mean-Squared Error : 0.067391
At Iteration : 10 / 10  ;  Mean-Squared Error : 0.067340


**Modifying the autoencoder for classification**

In [12]:
# Removing the decoder module from the autoencoder
new_classifier = nn.Sequential(*list(net.children())[:-1])
net = new_classifier
new_classifier = nn.Sequential(*list(net[0].children())[:-1])
net = new_classifier
# Adding linear layer for 10-class classification problem
net.add_module('classifier', nn.Sequential(nn.Linear(256, 10),nn.LogSoftmax(dim=1)))
print(net)
net = net.double().to(device)

Sequential(
  (0): Linear(in_features=784, out_features=400, bias=True)
  (1): Tanh()
  (2): Sequential(
    (0): Linear(in_features=400, out_features=256, bias=True)
    (1): Tanh()
  )
  (classifier): Sequential(
    (0): Linear(in_features=256, out_features=10, bias=True)
    (1): LogSoftmax(dim=1)
  )
)


**Training Classifier**

In [14]:
epochs = 20
learning_rate = 0.1
criterion = nn.NLLLoss()

for epoch in range(epochs):  # loop over the dataset multiple times

    runningLoss = 0.0
    for data in trainloader:
        # get the inputs
        inputs, labels = data        
        inputs, labels = inputs.view(-1, 28*28).double().to(device), labels.to(device)
        net.zero_grad()  # zeroes the gradient buffers of all parameters
        outputs = net(inputs) # forward 
        loss = criterion(outputs, labels) # calculate loss
        loss.backward() #  backpropagate the loss
        for f in net.parameters():
            f.data.sub_(f.grad.data * learning_rate) # weight = weight - learning_rate * gradient (Update Weights)
        runningLoss += loss.item()
        correct = 0
        total = 0
        
    net.eval()
    with torch.no_grad():
        for data in testloader:
            inputs, labels = data
            inputs, labels = inputs.view(-1, 28*28).double().to(device), labels.to(device)
            outputs = net(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum()
    print('At Iteration : %d / %d  ;  Train Error : %f  ;  Test Accuracy : %f '%(epoch + 1,epochs,
                                                                        runningLoss/(60000/BatchSize),100 * correct /float(total)))
print('Finished Training')


  cpuset_checked))


At Iteration : 1 / 20  ;  Train Error : 0.294759  ;  Test Accuracy : 91.909996 
At Iteration : 2 / 20  ;  Train Error : 0.284303  ;  Test Accuracy : 91.860001 
At Iteration : 3 / 20  ;  Train Error : 0.276313  ;  Test Accuracy : 92.419998 
At Iteration : 4 / 20  ;  Train Error : 0.267601  ;  Test Accuracy : 92.509995 
At Iteration : 5 / 20  ;  Train Error : 0.260983  ;  Test Accuracy : 92.599998 
At Iteration : 6 / 20  ;  Train Error : 0.254230  ;  Test Accuracy : 92.919998 
At Iteration : 7 / 20  ;  Train Error : 0.248215  ;  Test Accuracy : 93.029999 
At Iteration : 8 / 20  ;  Train Error : 0.242138  ;  Test Accuracy : 93.099998 
At Iteration : 9 / 20  ;  Train Error : 0.236887  ;  Test Accuracy : 93.239998 
At Iteration : 10 / 20  ;  Train Error : 0.230308  ;  Test Accuracy : 93.449997 
At Iteration : 11 / 20  ;  Train Error : 0.224803  ;  Test Accuracy : 93.500000 
At Iteration : 12 / 20  ;  Train Error : 0.220659  ;  Test Accuracy : 93.570000 
At Iteration : 13 / 20  ;  Train Erro

**Performance of different classes**

In [15]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
net.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data

        outputs = net(images.view(-1, 28*28).double().to(device))
        _, predicted = torch.max(outputs.data, 1)

        if use_gpu:
            predicted = predicted.cpu()
        c = (predicted == labels).squeeze()
        for i in range(BatchSize):
            label = labels[i]
            class_correct[label] += float(c[i])
            class_total[label] += 1

for i in range(10):
    print('Accuracy of %5s : %f %%' % (
        classes[i], 100 * float(class_correct[i]) / float(class_total[i])))


  cpuset_checked))


Accuracy of  zero : 98.163265 %
Accuracy of   one : 97.973568 %
Accuracy of   two : 93.992248 %
Accuracy of three : 94.851485 %
Accuracy of  four : 93.279022 %
Accuracy of  five : 91.479821 %
Accuracy of   six : 95.302714 %
Accuracy of seven : 93.579767 %
Accuracy of eight : 91.786448 %
Accuracy of  nine : 92.963330 %
