<a href="https://colab.research.google.com/github/Volkner90/School/blob/main/MNIST_Pytorch_CNN_v03.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch  # this is the baseline functionality of Pytorch
import torch.nn as nn

from torchvision import datasets # a module that let's you load popular datasets 
                                 # for training a neural network

import torchvision.transforms as transforms  # The data in datasets is not 
                                  # always in the format we need. This module 
                                  #let's you easily modify 

import torchvision.utils as vision_utils  # submodule with useful functions for
                                  # handling images

import torch.optim as optim  # submodule wiht different otimization (or learning)
                            # algorithms

import torch.nn.functional as F

import torch.utils as utils

In [3]:
# Each sample in the dataset is a tuple (image:PIL[1,28,28],label:int)
train_ds = datasets.MNIST('../data', train=True, download=True)
test_ds = datasets.MNIST('../data', train=False, download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw



In [4]:
transform = transforms.ToTensor()  # converts anything into a Tensor

train_ds.transform = transform
test_ds.transform = transform


In [5]:
batch_size=32  # numer of elements to retrieve at once from the dataset

train_loader = utils.data.DataLoader(train_ds, # dataset to get the data from
                                    batch_size=batch_size,
                                    shuffle=True) 
# note that you can use multi-line statements in python

test_loader = utils.data.DataLoader(test_ds,
                                   batch_size=batch_size,
                                   shuffle=True)


# Topology
### Convolutional Layers
1. nn.Conv2d(in_channels=1,  n_filters_1, kernel_size),
2. nn.Conv2d(n_filters_1, n_filters_2, kernel_size)
3. nn.MaxPool2d(pool_kernel_size)


### Classification (linear) layers
4. nn.Flatten(start_dim=1)
5. nn.Linear(n_pixels, N1)
6. nn.Linear(N1, 10)
   
Note: 
We need to compute the size at the output of MaxPool to be able to define the number of inputs to the linear layers.

In [6]:
def conv_output_size(L_in, k, s=1, p=0, d=1):
  return (L_in +2*p - d*(k -1) -1)/s + 1

In [7]:
L_in = 28  # input size
n_filters_1 = 8
n_filters_2 = 16
k = 3 #tuple if not square matrix
pool_k = 2

N1 = 128  # neurons at linear layers


In [8]:
L_out1 = conv_output_size(L_in,k)
L_out2 = conv_output_size(L_out1,k)
L_out3 = conv_output_size(L_out2,k=pool_k,s=pool_k)
print(L_out1,L_out2, L_out3)


n_pixels = int(n_filters_2 * L_out3**2)
print(n_pixels)



26.0 24.0 12.0
2304


In [9]:
net = torch.nn.Sequential(
            nn.Conv2d(in_channels=1,  out_channels=n_filters_1, kernel_size=k),
            nn.Conv2d(in_channels=n_filters_1, out_channels=n_filters_2, kernel_size=k),
            nn.MaxPool2d(kernel_size=pool_k),
            nn.Flatten(start_dim=1),
            nn.Linear(n_pixels, N1),
            nn.Linear(N1, 10)
        )

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001)

n_epochs = 2  # number of epochs we want to train
for epoch in range(n_epochs):  # range(int_x) converts an integer into a list from 0 to int_x
    for batch_idx, data in enumerate(train_loader):  # consume the dataset, one batch at a time
        # get the inputs; data is a tuple of (inputs, labels)
        inputs, labels = data

        # zero the parameter gradients to get the gradient per batch
        optimizer.zero_grad() 

        # do a forward pass
        outputs = net(inputs)

        # compute the loss
        loss = criterion(outputs, labels)

        # do the backpropagation
        loss.backward()

        # Let the parameters with the optimizer
        optimizer.step()

        # print statistics
        if batch_idx % 100 == 0:    # print every 100 mini-batches
            print(f'[{epoch}, {batch_idx:5d}] loss: {loss.item():.3f}')

print('Finished Training')

[0,     0] loss: 2.304
[0,   100] loss: 2.298
[0,   200] loss: 2.269
[0,   300] loss: 2.240
[0,   400] loss: 2.243
[0,   500] loss: 2.221
[0,   600] loss: 2.197
[0,   700] loss: 2.152
[0,   800] loss: 2.089
[0,   900] loss: 2.021
[0,  1000] loss: 1.883
[0,  1100] loss: 1.793
[0,  1200] loss: 1.565
[0,  1300] loss: 1.516
[0,  1400] loss: 1.269
[0,  1500] loss: 1.226
[0,  1600] loss: 0.752
[0,  1700] loss: 0.916
[0,  1800] loss: 0.504
[1,     0] loss: 0.664
[1,   100] loss: 0.506
[1,   200] loss: 0.659
[1,   300] loss: 0.512
[1,   400] loss: 0.847
[1,   500] loss: 0.518
[1,   600] loss: 0.499
[1,   700] loss: 0.329
[1,   800] loss: 0.309
[1,   900] loss: 0.399
[1,  1000] loss: 0.484
[1,  1100] loss: 0.485
[1,  1200] loss: 0.231
[1,  1300] loss: 0.385
[1,  1400] loss: 0.267
[1,  1500] loss: 0.319
[1,  1600] loss: 0.247
[1,  1700] loss: 0.340
[1,  1800] loss: 0.312
Finished Training


In [11]:
n_samples = len(test_ds)
n_batches = len(test_loader)
test_loss = 0
n_hits = 0

with torch.no_grad():  # disable the AutoDiff record. Mind the indentation!
  for inputs,labels in test_loader: # python unpacks each tuple into inputs and labels
    # do a forward pass on this batch to get the outputs
    net_output = net(inputs)  # output is 10 values. The largest indicates the
                              # predicted category
    
    # To compute the accuracy, need to compute how many categories we got right
    # first, get the maximum for each prediction
    predicted_digits = torch.argmax(net_output, dim=1)  # dim=1 to do per image

    # compare the prediction with the labels
    current_hits = predicted_digits == labels  # == results in 0's or 1's 

    # add all the 1's because they mean we got those right 
    current_hits = torch.sum(current_hits)

    # accumulate our hits with those of the other batches
    n_hits = n_hits + current_hits

# normalize the accuracy 
Accuracy = n_hits / n_samples
print(f'Accuracy: {(100*Accuracy):.2f}%')



Accuracy: 89.64%
