# Task1: Training of the CNN

In [37]:
import torch
import torchvision as tv
from torchvision import transforms, datasets
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim 

In [38]:
# Settings
path_train='./cats_and_dogs_filtered/train/'
batch_size_train = 200 # how many samples per batch to load
num_workers_train = 0 # subprocesses to use for data loading
lr = 0.001 # TODO: learning rate
size_compressed = [80,80] # size of each image after compression


## Dataset Preprocessing

In [39]:
# Load training data

train_transform = tv.transforms.Compose([
    tv.transforms.Grayscale(num_output_channels=1), # reduce the channel Nr to one
    tv.transforms.Resize(size_compressed),
    tv.transforms.ToTensor()
])
train_data = tv.datasets.ImageFolder(root = path_train, transform = train_transform)
train_loader = torch.utils.data.DataLoader(train_data, batch_size = batch_size_train,
                                           num_workers = num_workers_train, shuffle = True)
print(train_data.class_to_idx) # return: dict with items (class_name, class_index)

{'cats': 0, 'dogs': 1}


Check some samples of our loaded training data.

In [40]:
examples = enumerate(train_loader)
batch_idx, (example_data, example_targets) = next(examples)

print('Shape of one training mini batch', example_data.shape)
print('Shape of one target mini batch', example_targets.shape)
print('Example training sample', example_data[0])
print('Target values', example_targets[:])
print(train_loader.dataset)

Shape of one training mini batch torch.Size([200, 1, 80, 80])
Shape of one target mini batch torch.Size([200])
Example training sample tensor([[[0.2941, 0.3059, 0.3176,  ..., 0.1255, 0.0471, 0.0196],
         [0.3098, 0.3216, 0.3294,  ..., 0.1647, 0.1373, 0.0549],
         [0.3176, 0.3294, 0.3373,  ..., 0.1216, 0.1569, 0.1490],
         ...,
         [0.3176, 0.3255, 0.3412,  ..., 0.3765, 0.3608, 0.3412],
         [0.3098, 0.3216, 0.3333,  ..., 0.3647, 0.3490, 0.3294],
         [0.3137, 0.3216, 0.3294,  ..., 0.3490, 0.3373, 0.3255]]])
Target values tensor([0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
        0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1,
        0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1,
        0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1,
        1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0,
        0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1

In [41]:
classes = ['cat', 'dog']
mean, std = torch.tensor([0.5]), torch.tensor([0.5])

def imshow_input(image):
    image = image.permute(1, 2, 0)
    image = torch.clamp(image,0, 1)
    plt.imshow(image.squeeze(), cmap='gray') 	

In [42]:
# get a batch of loaded training images
image_batch = enumerate(train_loader)
batch_idx, (images, labels) = next(image_batch)

print('Current index of batch:{}'.format(batch_idx))
print('Shape of each image:{}'.format(images[0].shape))

Current index of batch:0
Shape of each image:torch.Size([1, 80, 80])


## Model of CNN

In [43]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # here we use a structure  contains 2 types of neural networks, a convolution network for the feature extraction
        # and a fully connected layer to realize the classification 
        # ===========================================================================
        # define the number of filters in the CNNs for features extraction
        num_filters = [1, 64, 64, 128, 128]
        # define the number of the nodes in the fully connected layers for classification
        num_NN = [256, 64]
        # compute a convolution layer ======== Some details https://pytorch.org/docs/stable/nn.html?highlight=conv2d#torch.nn.Conv2d
        self.con1 = nn.Conv2d(in_channels = num_filters[0], 
                      out_channels = num_filters[1], 
                      kernel_size = 5, 
                      padding = 1,
                      bias = False)
        #compute amax pooling layer
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        #Batch nornamlization
        self.bn1 = nn.BatchNorm2d(num_features=num_filters[1])
        self.con2= nn.Conv2d(in_channels = num_filters[1],
                      out_channels = num_filters[2], 
                      kernel_size = 3, 
                      padding = 1,
                      bias = False)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.bn2 = nn.BatchNorm2d(num_features=num_filters[2])
        self.con3 = nn.Conv2d(in_channels = num_filters[2],
                      out_channels = num_filters[3], 
                      kernel_size = 3, 
                      padding = 1,
                      bias = False)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.bn3 = nn.BatchNorm2d(num_features=num_filters[3])
        self.con4 = nn.Conv2d(in_channels = num_filters[3],
                      out_channels = num_filters[4], 
                      kernel_size = 3, 
                      padding = 1,
                      bias = False)
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.bn4 = nn.BatchNorm2d(num_features=num_filters[4])
        self.fc1 = nn.Linear(num_filters[4]*4*4,num_NN[0])
        self.fc2 = nn.Linear(num_NN[0],num_NN[1])
        self.fc3 = nn.Linear(num_NN[1],2)
    def forward(self, x):
        x = self.pool1(F.relu(self.bn1(self.con1(x))))
        x = self.pool2(F.relu(self.bn2(self.con2(x))))
        x = self.pool3(F.relu(self.bn3(self.con3(x))))
        x = self.pool4(F.relu(self.bn4(self.con4(x))))
        x = x.view(-1, 128*4*4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

## Layout of the CNN

In [44]:
kaggle_Net = CNN()
print(kaggle_Net)
params = list(kaggle_Net.parameters())
print(params[2].shape)

CNN(
  (con1): Conv2d(1, 64, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1), bias=False)
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (con2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (con3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (con4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (pool4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (bn4): BatchNorm2d(128, eps

## Training

In [45]:
def train(NeuralNetwork,train_loader,loss_function,num_epochs, learning_rate=0.001, wd=0 ):
    """
    Trains a neural network.
    
    NeuralNetwork = neural network to be trained
    dataloader = DataLoader that deals batches for mini-batch learning
    loss_function = cost function to be optimized
    num_epochs = number of training epochs
    l_rate = learning rate (default value 0.001)
    wd = weight decay regularization (default value 0)
    """
    optimizer = torch.optim.Adam(NeuralNetwork.parameters(), lr = learning_rate, weight_decay=wd) #use SGD as the optimizer 
    for epoch in range(num_epochs):
        running_loss = 0.0
        errors = 0
        for batch_idx , data in enumerate(train_loader,0):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = NeuralNetwork(inputs)
            loss = loss_function(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            # error rate
            predicted = torch.max(outputs,dim=1)
            errors += sum(predicted[1] != labels)
            if (batch_idx % 100) == 0:
                print('Current loss ',running_loss/(batch_idx+1))
        print('Epoch: ',epoch+1,'Error rate on training set:', round(100.0* errors.numpy() / len(train_loader.dataset),2), '%')
    print('Finished Training')

## Train the CNN

In [46]:
train(kaggle_Net,train_loader,nn.CrossEntropyLoss(),10, lr)

Current loss  4.262293815612793
Epoch:  1 Error rate on training set: 53.7 %
Current loss  0.6462950706481934
Epoch:  2 Error rate on training set: 41.0 %
Current loss  0.6566214561462402
Epoch:  3 Error rate on training set: 33.7 %
Current loss  0.6115033626556396
Epoch:  4 Error rate on training set: 30.55 %
Current loss  0.5776083469390869
Epoch:  5 Error rate on training set: 25.7 %
Current loss  0.4659102261066437
Epoch:  6 Error rate on training set: 24.7 %
Current loss  0.390491247177124
Epoch:  7 Error rate on training set: 21.2 %
Current loss  0.4465175271034241
Epoch:  8 Error rate on training set: 16.95 %
Current loss  0.36088693141937256
Epoch:  9 Error rate on training set: 16.3 %
Current loss  0.3342365026473999
Epoch:  10 Error rate on training set: 13.2 %
Finished Training


##  Save the model

In [None]:
torch.save(kaggle_Net,'kaggle_Net.pkl') #save the parameters of te net