The first project of a neural network, whose purpose is to classify images. We use the CIFAR10 dataset. Now install all the necessary packages.

In [1]:
import os

import numpy as np
import random
from tqdm import *

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets
import torchvision.transforms as transforms
#%pip install -q torchinfo
from torchinfo import summary
import matplotlib.pyplot as plt

In [2]:
# fix the seed  

def seed_everything(seed):
    random.seed(seed) # fix the generate of random number
    os.environ['PYTHONHASHSEED'] = str(seed) # fix filling in hashes 
    np.random.seed(seed) # fix the generate of random number numpy
    torch.manual_seed(seed) # fix the generate of random number pytorch
    torch.cuda.manual_seed(seed) # fix the generate of random number for GPU
    torch.backends.cudnn.deterministic = True # choose only deterministic algoritms (for conv)
    torch.backends.cudnn.benchmark = False # fix algoritm estimation conv 

In [3]:
#class for experiment

class CFG:

  num_epochs = 10  
  train_batch_size = 32  
  test_batch_size = 512  
  num_workers = 4  # num of process for simultaneous processing data (not critical parametr to small datasets)
  lr = 3e-4 
  seed = 42  
  classes = ('airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')  

In [4]:
def class2dict(class_data):
  return dict((name, getattr(class_data, name)) for name in dir(class_data) if not name.startswith('__'))

We implement the LeNet5 architecture. 

This type of architecture contains 8 levels, including output. It can be briefly described as follows: input -> convolutional layer with 6 feature maps -> pooling layer -> convolutional layer with 16 feature maps -> pooling layer -> then three fully connected layers -> output.

In [6]:
class LeNet5(torch.nn.Module):
    def __init__(self,
                 activation='tanh',
                 pooling='avg',
                 conv_size=5
                ):
        super(LeNet5, self).__init__()

        self.conv_size = conv_size  # the size of the convolution window

        #set the activation function to select
        if activation == 'tanh':
            activation_function = torch.nn.Tanh()
        elif activation == 'relu':
            activation_function = torch.nn.ReLU()
        else:
            raise NotImplementedError

        # set the pooling type to select
        if pooling == 'avg':
            pooling_layer = torch.nn.AvgPool2d(kernel_size=2, stride=2)
        elif pooling == 'max':
            pooling_layer = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        else:
            raise NotImplementedError

        # set the core size of the first layer
        if conv_size == 5:
            self.conv1 = torch.nn.Conv2d(in_channels=1,
                                         out_channels=6,
                                         kernel_size=5,
                                         padding=2)
        elif conv_size == 3:
            self.conv1_1 = torch.nn.Conv2d(in_channels=1,
                                         out_channels=6,
                                         kernel_size=3,
                                         padding=1)
            self.conv1_2 = torch.nn.Conv2d(in_channels=6,
                                         out_channels=6,
                                         kernel_size=3,
                                         padding=1)
        else:
            raise NotImplementedError

        self.act1 = activation_function
        self.pool1 = pooling_layer

        # set the core size of the second layer
        if conv_size == 5:
            self.conv2 = self.conv2 = torch.nn.Conv2d(in_channels=6,
                                         out_channels=16,
                                         kernel_size=5,
                                         padding=0)
        elif conv_size == 3:
            self.conv2_1 = torch.nn.Conv2d(in_channels=6,
                                         out_channels=16,
                                         kernel_size=3,
                                         padding=0)
            self.conv2_2 = torch.nn.Conv2d(in_channels=16,
                                         out_channels=16,
                                         kernel_size=3,
                                         padding=0)
        else:
            raise NotImplementedError

        self.act2 = activation_function
        self.pool2 = pooling_layer

        # fully conected layer
        self.fc1 = torch.nn.Linear(5 * 5 * 16, 120)
        self.act3 = activation_function

        self.fc2 = torch.nn.Linear(120, 84)
        self.act4 = activation_function

        self.fc3 = torch.nn.Linear(84, 10)

    # forward propagation
    def forward(self, x):
        # first layer
        if self.conv_size == 5:
            x = self.conv1(x)
        elif self.conv_size == 3:
            x = self.conv1_2(self.conv1_1(x))

        x = self.act1(x)
        x = self.pool1(x)

        # second layer
        if self.conv_size == 5:
            x = self.conv2(x)
        elif self.conv_size == 3:
            x = self.conv2_2(self.conv2_1(x))

        x = self.act2(x)
        x = self.pool2(x)

        # fully conected layers
        x = x.view(x.size(0), x.size(1) * x.size(2) * x.size(3))
        x = self.fc1(x)
        x = self.act3(x)
        x = self.fc2(x)
        x = self.act4(x)
        x = self.fc3(x)

        return x

Implementation of the function for training and testing the model.

In [8]:
def train(model, device, train_loader, optimizer, criterion, epoch):
    model.train() # putting the model into training mode
    train_loss = 0  
    correct = 0  # initialize the proportion of correct predictions of the model

    n_ex = len(train_loader)  # nums butch in train dataset 

    for batch_idx, (data, target) in tqdm(enumerate(train_loader), total=n_ex):
        data, target = data.to(device), target.to(device)  
        optimizer.zero_grad() 
        output = model(data)  # prediction of the model 
        pred = output.argmax(dim=1, keepdim=True)  # classes that the model predicted
        correct += pred.eq(target.view_as(pred)).sum().item()  # count the proportion of correct predictions of the model
        train_loss = criterion(output, target)  
        train_loss.backward() 
        optimizer.step() # updating the weights of the model

    tqdm.write('\nTrain set: Average loss: {:.4f}, Accuracy: {:.2f}%'.format(
        train_loss, 100. * correct / len(train_loader.dataset)))   

In [9]:
def test(model, device, test_loader, criterion):
    model.eval() # putting the model into testing mode
    train_loss = 0 
    correct = 0  

    with torch.no_grad(): # gradients are not counted in testing mode
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)  
            output = model(data)  
            test_loss = criterion(output, target)  
            pred = output.argmax(dim=1, keepdim=True)  
            correct += pred.eq(target.view_as(pred)).sum().item()  

    tqdm.write('Test set: Average loss: {:.4f}, Accuracy: {:.2f}%'.format(
        test_loss, 100. * correct / len(test_loader.dataset)))   

In [10]:
def main_CIFAR(model):

    use_cuda = torch.cuda.is_available()

    seed_everything(CFG.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': CFG.num_workers, 'pin_memory': True} if use_cuda else {}

    # download dataset CIFAR10
    train_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10('../data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)) # normalizing the meaning
                       ])),
        batch_size=CFG.train_batch_size, shuffle=True, **kwargs)

    test_loader = torch.utils.data.DataLoader(
        datasets.CIFAR10('../data', train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
                       ])),
        batch_size=CFG.test_batch_size, shuffle=False, **kwargs)

    model = model.to(device)


    optimizer = optim.Adam(model.parameters(),
                          lr=CFG.lr)

    criterion = nn.CrossEntropyLoss()

    for epoch in range(1, CFG.num_epochs + 1):
        print('\nEpoch:', epoch)
        train(model, device, train_loader, optimizer, criterion, epoch)
        test(model, device, test_loader, criterion)
    print('Training is end!')

In [11]:
# creating a convolutional network 
class CIFAR_Net(torch.nn.Module):
    def __init__(self):
        super(CIFAR_Net, self).__init__()

        self.conv1 = torch.nn.Conv2d(3, 16, 3, padding=1)
        self.act1  = torch.nn.ReLU()
        self.pool1 = torch.nn.MaxPool2d(2, 2)

        self.conv2 = torch.nn.Conv2d(16, 32, 3, padding=1)
        self.act2  = torch.nn.ReLU()
        self.pool2 = torch.nn.MaxPool2d(2, 2)

        self.conv3 = torch.nn.Conv2d(32, 64, 3, padding=1)
        self.act3  = torch.nn.ReLU()

        self.fc1   = torch.nn.Linear(8 * 8 * 64, 256)
        self.act4  = torch.nn.Tanh()

        self.fc2   = torch.nn.Linear(256, 64)
        self.act5  = torch.nn.Tanh()

        self.fc3   = torch.nn.Linear(64, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.act1(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.act2(x)
        x = self.pool2(x)

        x = self.conv3(x)
        x = self.act3(x)

        x = x.view(x.size(0), x.size(1) * x.size(2) * x.size(3))
        x = self.fc1(x)
        x = self.act4(x)
        x = self.fc2(x)
        x = self.act5(x)
        x = self.fc3(x)

        return x

In [12]:
model_CNN = CIFAR_Net()

In [13]:
summary(model=model_CNN,
        input_size=(32, 3, 32, 32), # input batch
        col_names=["input_size", "output_size", "num_params"], # what want to see
        col_width=20
)

Layer (type:depth-idx)                   Input Shape          Output Shape         Param #
CIFAR_Net                                [32, 3, 32, 32]      [32, 10]             --
├─Conv2d: 1-1                            [32, 3, 32, 32]      [32, 16, 32, 32]     448
├─ReLU: 1-2                              [32, 16, 32, 32]     [32, 16, 32, 32]     --
├─MaxPool2d: 1-3                         [32, 16, 32, 32]     [32, 16, 16, 16]     --
├─Conv2d: 1-4                            [32, 16, 16, 16]     [32, 32, 16, 16]     4,640
├─ReLU: 1-5                              [32, 32, 16, 16]     [32, 32, 16, 16]     --
├─MaxPool2d: 1-6                         [32, 32, 16, 16]     [32, 32, 8, 8]       --
├─Conv2d: 1-7                            [32, 32, 8, 8]       [32, 64, 8, 8]       18,496
├─ReLU: 1-8                              [32, 64, 8, 8]       [32, 64, 8, 8]       --
├─Linear: 1-9                            [32, 4096]           [32, 256]            1,048,832
├─Tanh: 1-10                      

In [14]:
main_CIFAR(model_CNN)

Files already downloaded and verified

Epoch: 1


100%|██████████| 1563/1563 [01:33<00:00, 16.73it/s]



Train set: Average loss: 1.4950, Accuracy: 48.81%
Test set: Average loss: 1.2161, Accuracy: 59.33%

Epoch: 2


100%|██████████| 1563/1563 [01:33<00:00, 16.81it/s]



Train set: Average loss: 1.1561, Accuracy: 63.10%
Test set: Average loss: 1.1204, Accuracy: 64.10%

Epoch: 3


100%|██████████| 1563/1563 [01:32<00:00, 16.81it/s]



Train set: Average loss: 0.7673, Accuracy: 69.16%
Test set: Average loss: 1.0160, Accuracy: 67.78%

Epoch: 4


100%|██████████| 1563/1563 [01:29<00:00, 17.43it/s]



Train set: Average loss: 0.7138, Accuracy: 73.12%
Test set: Average loss: 0.9874, Accuracy: 69.43%

Epoch: 5


100%|██████████| 1563/1563 [01:29<00:00, 17.46it/s]



Train set: Average loss: 0.8714, Accuracy: 76.48%
Test set: Average loss: 1.0616, Accuracy: 69.61%

Epoch: 6


100%|██████████| 1563/1563 [01:30<00:00, 17.29it/s]



Train set: Average loss: 0.4142, Accuracy: 79.85%
Test set: Average loss: 0.9528, Accuracy: 71.85%

Epoch: 7


100%|██████████| 1563/1563 [01:29<00:00, 17.45it/s]



Train set: Average loss: 0.3773, Accuracy: 83.14%
Test set: Average loss: 0.9431, Accuracy: 71.55%

Epoch: 8


100%|██████████| 1563/1563 [01:29<00:00, 17.47it/s]



Train set: Average loss: 0.4931, Accuracy: 86.55%
Test set: Average loss: 1.0296, Accuracy: 72.09%

Epoch: 9


100%|██████████| 1563/1563 [01:29<00:00, 17.51it/s]



Train set: Average loss: 0.3742, Accuracy: 89.82%
Test set: Average loss: 1.0832, Accuracy: 72.16%

Epoch: 10


100%|██████████| 1563/1563 [01:29<00:00, 17.54it/s]



Train set: Average loss: 0.1419, Accuracy: 92.78%
Test set: Average loss: 1.0921, Accuracy: 72.10%
Training is end!


In [15]:
from PIL import Image

In [36]:
def test_check(model, device):
    model.eval() 
    for i in range(1, 11):
        data = Image.open(f"img{i}.jpg") 
        data = data.resize((32,32),Image.ANTIALIAS)
        transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))])
        data_transf = transform(data)

        classes = ('airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
        
        data_transf.unsqueeze_(0)
        with torch.no_grad(): 
            data_transf = data_transf.to(device)
            output = model(data_transf)
            pred = output.argmax(dim=1, keepdim=True)
            print(classes[pred.item()])    



In [37]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
test_check(model_CNN, device)

bird
airplane
automobile
dog
deer
dog
airplane
horse
ship
truck
