<a href="https://colab.research.google.com/github/dltnqls9788/Pytorch/blob/main/MNIST_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import libraries 

In [1]:
import numpy as np 

import torch 
import torch.nn as nn 
import torch.optim as optim 
import torch.nn.init as init

import torchvision.datasets as dset 
import torchvision.transforms as transforms

from torch.utils.data import DataLoader
from torch.autograd import Variable

## Set hyperparameters

In [2]:
batch_size = 16 
learning_rate = 0.0002
num_epoch = 10

## Download MNIST dataset

In [3]:
mnist_train = dset.MNIST("./", train=True, transform=transforms.ToTensor(), target_transform=None, download=True)
mnist_test = dset.MNIST("./", train=False, transform=transforms.ToTensor(), target_transform=None, download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw



## Set dataloader

In [4]:
train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=2, drop_last=True)
test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=2, drop_last=True)

## Define CNN model

In [5]:
## acc = 98.11
class CNN(nn.Module):
  def __init__(self):
    super(CNN,self).__init__()
    self.layer = nn.Sequential(
        nn.Conv2d(1, 16, kernel_size=5),
        nn.ReLU(),

        nn.Conv2d(16, 32, kernel_size=5, padding="same"),
        nn.BatchNorm2d(32),
        nn.ReLU(),
        nn.MaxPool2d(2,2),
        
        nn.Conv2d(32, 64, kernel_size=5, padding="same"),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d(2,2),
    )

    # Fully Connected layers(MLP)
    self.fc_layer = nn.Sequential(
        nn.Linear(6*6*64, 100), 
        nn.ReLU(),
        nn.Linear(100,10)     
    )
        
  def forward(self, x):
    out = self.layer(x) # Convolutional layers
    out = out.view(batch_size, -1) # Flatting layer
    out = self.fc_layer(out) # Fully connected layers

    return out # output features

model = CNN().cuda()

In [None]:
model

CNN(
  (layer): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=same)
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=same)
    (8): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU()
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layer): Sequential(
    (0): Linear(in_features=2304, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=10, bias=True)
  )
)

## Define loss function and optimizer

In [None]:
loss_func = nn.CrossEntropyLoss() 
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

## Train CNN model

In [None]:
for i in range(num_epoch):
  for j, [image, label] in enumerate(train_loader):
    x = Variable(image).cuda()
    y_= Variable(label).cuda()   
    optimizer.zero_grad()  # Initialize gradients before backpropagation
    
    output = model.forward(x) # Get CNN model's outputs
    loss = loss_func(output, y_) # Calculate the cross-entropy loss
    loss.backward()  # Calculate gradients from the loss
    
    optimizer.step()  #Backpropagate the gradients to train CNN model 

    if j % 1000 == 0:
      print(loss)

tensor(2.4187, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.3993, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(1.1913, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.5446, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.6997, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.2385, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.3108, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.4667, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.2963, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1394, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.3547, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1355, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.3182, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1641, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.1542, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.2674, device='cuda:0', grad_fn=<NllLossBackward0>)
tensor(0.0483, device='cuda:0', grad_fn=

## Test CNN model

In [None]:
correct = 0
total = 0
for image,label in test_loader:
    x = Variable(image,volatile=True).cuda()
    y_= Variable(label).cuda()
    output = model.forward(x)
    _,output_index = torch.max(output,1) # Select the digits with the highest output scores (0-9)
    total += label.size(0) # Counting the total number of test data
    correct += (output_index == y_).sum().float() # Counting the number of correctly classified data
print("Accuracy of Test Data: {}".format(100*correct/total))

  after removing the cwd from sys.path.


Accuracy of Test Data: 98.11000061035156
