<a href="https://colab.research.google.com/github/ayyucedemirbas/machine_learning_algorithms/blob/master/PyTorch_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torchvision import datasets
import torchvision.transforms as transforms

In [2]:
transform = transforms.ToTensor()

train_data = datasets.MNIST(root='data', train=True,
                                   download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False,
                                  download=True, transform=transform)

In [3]:
num_train = len(train_data)
print('num_train = len(train_data) ==> ', num_train)
indices = list(range(num_train))
print('len(indices) ==>', len(indices))

num_train = len(train_data) ==>  60000
len(indices) ==> 60000


In [4]:
import numpy as np

In [5]:
np.random.shuffle(indices)

In [6]:
#split the fucking dataset
valid_size = 0.2
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

print('len(train_idx) ==> ', len(train_idx))
print('len(valid_idx) ==> ', len(valid_idx))

len(train_idx) ==>  48000
len(valid_idx) ==>  12000


In [7]:
from torch.utils.data.sampler import SubsetRandomSampler

In [8]:
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

num_workers = 0
batch_size = 16

train_loader = torch.utils.data.DataLoader(dataset=train_data, \
                                           batch_size=batch_size, \
                                           sampler=train_sampler, \
                                           num_workers=num_workers)

valid_loader = torch.utils.data.DataLoader(dataset=train_data, \
                                           batch_size=batch_size, \
                                           sampler=valid_sampler, \
                                           num_workers=num_workers)

test_loader = torch.utils.data.DataLoader(dataset=test_data, \
                                          batch_size=batch_size, \
                                          num_workers=num_workers)

In [9]:
dataiter = iter(train_loader)
images, labels = dataiter.next()
print(len(images), len(labels))
print('Correct Labels: ', labels)
images = images.numpy()
print('Shape of our images tensor =', images.shape)
print('Batch Size =', images.shape[0], 'Image Height/Width =', \
                                                        images.shape[2])

print()
print('Squeezing the images tensor =', np.squeeze(images).shape)
print('Un-squeezing the images tensor (axis=3) =', \
                                        np.expand_dims(images, axis=3).shape)

16 16
Correct Labels:  tensor([7, 8, 4, 1, 4, 2, 7, 1, 0, 1, 8, 2, 9, 9, 4, 6])
Shape of our images tensor = (16, 1, 28, 28)
Batch Size = 16 Image Height/Width = 28

Squeezing the images tensor = (16, 28, 28)
Un-squeezing the images tensor (axis=3) = (16, 1, 28, 1, 28)


In [10]:
import torch.nn as nn
import torch.nn.functional as F
class MNISTModel(nn.Module):
    def __init__(self):
        super(MNISTModel, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=3, \
                               stride=1, padding=1) 
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, \
                               stride=1, padding=1)
       
        self.linear1 = nn.Linear(in_features=12544, out_features=256)
        self.linear2 = nn.Linear(in_features=256, out_features=64)
        self.linear3 = nn.Linear(in_features=64, out_features=10)

        self.dropout = nn.Dropout(p=0.25)

    def forward(self, image_batch):
        image_batch = F.relu(input=self.conv1(image_batch))          
        image_batch = F.relu(input=self.conv2(image_batch))  
        
        flat_image_batch = image_batch.view(image_batch.shape[0], -1) # Flatten MNIST images into a 784 long vector
        flat_image_batch = F.relu(input=self.linear1(flat_image_batch))
        flat_image_batch = self.dropout(F.relu(input=self.linear2(flat_image_batch)))
        flat_image_batch = F.relu(input=self.linear3(flat_image_batch))
        return flat_image_batch               

In [11]:
from torchsummary import summary 

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 
print('The model will run on', device)

mymodel = MNISTModel().to(device)
summary(model=mymodel, input_size=(1, 28, 28), batch_size=16)

The model will run on cpu
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [16, 8, 28, 28]              80
            Conv2d-2           [16, 16, 28, 28]           1,168
            Linear-3                  [16, 256]       3,211,520
            Linear-4                   [16, 64]          16,448
           Dropout-5                   [16, 64]               0
            Linear-6                   [16, 10]             650
Total params: 3,229,866
Trainable params: 3,229,866
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.05
Forward/backward pass size (MB): 2.34
Params size (MB): 12.32
Estimated Total Size (MB): 14.71
----------------------------------------------------------------


In [13]:
modelsum= MNISTModel()
print(modelsum)

MNISTModel(
  (conv1): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (linear1): Linear(in_features=12544, out_features=256, bias=True)
  (linear2): Linear(in_features=256, out_features=64, bias=True)
  (linear3): Linear(in_features=64, out_features=10, bias=True)
  (dropout): Dropout(p=0.25, inplace=False)
)


In [14]:
import torch.optim as optim

In [15]:
optimizer = optim.SGD(mymodel.parameters(), lr=0.003, momentum=0.9)

In [16]:
criterion = nn.CrossEntropyLoss()      

In [17]:
print('Training started')
for epoch in range(2):  # loop over the dataset multiple times
    print('Epoch: ',epoch)

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # get the inputs; data is a list of [images, labels]
        images, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = mymodel(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

Training started
Epoch:  0
[1,  2000] loss: 0.654
Epoch:  1
[2,  2000] loss: 0.145
Finished Training


In [18]:
testiter = iter(test_loader)

In [19]:
images, labels = dataiter.next()
outputs = mymodel(images)
_, predicted = torch.max(outputs, 1)
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = mymodel(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 96 %


In [20]:
classes = ('zero', 'one', 'two', 'three',
           'four', 'five', 'six', 'seven', 'eight', 'nine')

In [21]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = mymodel(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

Accuracy of  zero : 97 %
Accuracy of   one : 98 %
Accuracy of   two : 95 %
Accuracy of three : 96 %
Accuracy of  four : 95 %
Accuracy of  five : 93 %
Accuracy of   six : 98 %
Accuracy of seven : 95 %
Accuracy of eight : 91 %
Accuracy of  nine : 95 %
