<a href="https://colab.research.google.com/github/jelegend/ANN-MLsummerproject/blob/master/MNIST_Classification_Model_%5BGPU%5D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.nn import functional
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import time
from IPython import display

In [0]:
# grayscale and inline plotting
%matplotlib inline
plt.rcParams['image.cmap'] = 'gray'

In [0]:
def plot_image(image):
    nr, nc = image.shape
    extent = [-0.5, nc - 0.5, nr - 0.5, -0.5]
    plt.imshow(image, extent=extent, origin='upper', interpolation='nearest')

def visualize(t, loss, errcl, x5, x4, x3, x0, w1):

    loss_avg = np.divide(
        np.cumsum(loss[: t + 1]),
        range(1, t + 2)
    )

    errcl_avg = np.divide(
        np.cumsum(errcl[: t + 1]),
        range(1, t + 2)
    )

    n_last_batches = np.min([20, t])
    k = np.ones(n_last_batches * 2 + 1) / (n_last_batches + 1)
    k[:n_last_batches] = 0
    
    errcl_sm = np.convolve(np.pad(errcl, mode="edge", pad_width=n_last_batches), k, mode="valid")
    errcl_sm = errcl_sm[: len(errcl_avg)]

    loss_sm = np.convolve(np.pad(loss, mode="edge", pad_width=n_last_batches), k, mode="valid")
    loss_sm = loss_sm[: len(loss)]

    display.clear_output(wait=True)

    plt.subplot(4, 3, 1)
    plt.plot(loss, label="loss")
    plt.plot(loss_sm, label="smothed loss")
    plt.plot(loss_avg, label="avg loss")
    plt.legend()
    plt.ylim(0, np.max(loss)*1.05)
    plt.title("loss: avg - %.4f, smoothed - %.4f, current - %.4f"  % (loss_avg[t], loss_sm[t], loss[t]))

    plt.subplot(4, 3, 2)
    plt.plot(errcl, label="cl err")
    plt.plot(errcl_sm, label="smothed cl err")
    plt.plot(errcl_avg, label="avg cl err")
    plt.legend()
    plt.ylim(0, np.max(errcl)*1.05)
    plt.title("cl error: avg - %.4f, smoothed - %.4f, current - %.4f"  % (errcl_avg[t], errcl_sm[t], errcl[t]))
    
    plt.subplot(4, 3, 3)
    plt.bar(range(len(x5)), x5)
    plt.title("class confidences")

    plt.subplot(4,3,4)
    plt.hist(x4)
    plt.title("F6 activations")

    plt.subplot(4,3,5)
    plt.hist(x3)
    plt.title("C5 activations")

    plt.subplot(4,3,6)
    plot_image(x0)
    plt.title("input image")

    for i in range(w1.shape[0]):
        plt.subplot(4,3,7+i)
        plot_image(w1[i])
        plt.title("C1 kernel channel " + str(i))

    plt.subplots_adjust(wspace=0.5)
    plt.subplots_adjust(hspace=0.5)
    plt.gcf().set_size_inches(18.5, 10.5)
    display.display(plt.gcf())

In [4]:
mnist = torchvision.datasets.MNIST(root='data', train=True, download=True) # train data only
trainimages = mnist.data
trainlabels = mnist.targets

# check training data shape
print ("Training Data shape is: ", list(trainimages.size()))
print ("Training Target shape is: ", list(trainlabels.size()))

Training Data shape is:  [60000, 28, 28]
Training Target shape is:  [60000]


In [0]:
class Network(nn.Module):
    
    def __init__(self):
        super(Network, self).__init__()
        # convolutional layer
        self.conv1 = nn.Conv2d(in_channels=1,out_channels=32, kernel_size=(3,3), stride=1)
        # convolutional layer
        self.conv2 = nn.Conv2d(in_channels=32,out_channels=64, kernel_size=(3,3), stride=1)
        # max pooling layer
        self.pool = nn.MaxPool2d(kernel_size = (2,2),stride=2)
        # dropout layer 1 (p=0.25)
        self.dropout1 = nn.Dropout(0.25)
        # linear layer (9216 -> 128)
        self.fc1 = nn.Linear(9216, 128)
        # dropout layer 2 (p=0.5)
        self.dropout2 = nn.Dropout(0.5)
        # linear layer (500 -> 10)
        self.fc2 = nn.Linear(128, 10)

        
    def forward(self, x):
       # add sequence of convolutional and max pooling layers
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = self.pool(x)
        #Flattening the output for the Linear layers
        x = x.view(x.size(0),-1) 
        #Dropout Layer
        x = self.dropout1(x)
        #Linear fully-connected layer and ReLU
        x = torch.relu(self.fc1(x))
        #Dropout Layer
        x = self.dropout2(x)
        #Linear fully-connected layer and Softmax
        x = torch.softmax(self.fc2(x), dim=1)
        
        return x

In [6]:
ntrain = trainimages.shape[0];  # number of training examples
nepoch = 10;                    # number of epochs through training set
disp_freq = 100                 # display frequency
batchsize = 32                  # minibatch size

errs = []
losses = []

net = Network()

# use SGD optimizer, set learning rate parameter as 0.001
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum = 0.9)

t_start = time.time()
for iepoch in range(nepoch):
    for t in range(int(ntrain / batchsize)):
        batchindices = np.random.choice(ntrain, batchsize, replace=False)
        trainlabels_iter = trainlabels[batchindices]
        
        # label 1 for the correct digit and -1 for the incorrect digits
        y = torch.ones(10, batchsize) * (-1)
        y[trainlabels_iter, torch.arange(batchsize, dtype=torch.int64)] = 1

        # normalize input images
        imgs = torch.zeros([batchsize, 1, 28, 28])
        imgs[:, 0, 2: -2, 2: -2] = trainimages[batchindices].float() / 255.

        # before the forward pass, clean the gradient buffers of all parameters
        optimizer.zero_grad()

        # forward pass
        out = net(imgs)
        
        # MSE loss
        loss = criterion(out, y)

        # backward pass
        loss.backward()

        # update parameters using SGD
        optimizer.step()

        # calculate error rate and loss for plot
        pred = torch.argmax(out, dim=1)
        err = torch.mean((pred != trainlabels_iter).float())
        errs.append(err.detach().numpy())
        losses.append(loss.detach().numpy())

        
        # plots
        if (t + 1) % disp_freq == 0:
            plt.gcf().clear()
            visualize(len(errs) - 1, losses, errs, out[0,:].detach(), lenet5.record["F6"][:, 0], 
                      lenet5.record["C5"][:, 0], imgs[0, 0].detach(), lenet5.C1.weight.detach().squeeze())
            print(str(time.time() - t_start) + " seconds per " + str(disp_freq) + " iterations")
            t_start = time.time()
            time.sleep(0.01)


RuntimeError: ignored