### Some preparations before we begin (or begging...)

In [53]:
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118


import torch
from torch import nn
import torchvision
import os
import pandas as pd
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
import numpy as np

print(torch.cuda.is_available())

def collate_fn(batch):
    images, labels = [], []

    for item in batch:
        images.append(item[0])
        labels.append(item[1])
    images = np.stack(images)
    images = torch.from_numpy(images).float()
    labels = torch.tensor(labels)
    return images, labels


# some config vars
batch_size = 64 
num_classes = 10
epochs = 20
learning_rate = 0.0001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# sample data
need_download = not os.path.exists('data')
train = torchvision.datasets.CIFAR10(root='data', train=True, download=need_download, transform=ToTensor())
dataset = DataLoader(train, batch_size=batch_size, shuffle=True)
test = torchvision.datasets.CIFAR10(root='data', train=False, download=need_download, transform=ToTensor())
dataset_test = DataLoader(test, batch_size=batch_size, shuffle=True)


(train, dataset)

True


(Dataset CIFAR10
     Number of datapoints: 50000
     Root location: data
     Split: Train
     StandardTransform
 Transform: ToTensor(),
 <torch.utils.data.dataloader.DataLoader at 0x28afc4e0820>)

In [54]:
import tqdm.notebook as tqdm
# simple function for training and calculating loss
def basic_training(model, allow_print=True) -> nn.Module:
    if allow_print:
        print(f"{model.__class__.__name__} is training")

    model.to(device)
    opt = torch.optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = nn.CrossEntropyLoss()

    for epoch, _ in enumerate(tqdm.tqdm(range(epochs))):
        model.train()
        for batch in dataset:
            data, target = batch
            data, target = data.to(device), target.to(device)
            opt.zero_grad()
            output = model(data)
            loss = loss_fn(output, target)
            loss.backward()
            opt.step()

        if allow_print:
            print(f"Epoch: {epoch}, Loss: {loss.item()}")
        
    if allow_print:
        print("Training done")
    
    return model


In [55]:
# simple function for testing
def basic_testing(model, allow_print=True) -> float:
    if allow_print:
        print(f"{model.__class__.__name__} is testing")

    model.to(device)
    model.eval()
    correct = 0
    with torch.no_grad():
        for batch in dataset_test:
            data, target = batch
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    if allow_print:
        print(f"Accuracy: {correct / len(test) * 100}%")
    
    return correct / len(test)

### Model with only fully connected layers

In [57]:
# make a model only with fully connected layers
class DenseModel(torch.nn.Module):
    def __init__(self):
        super(DenseModel, self).__init__()

        # that piece of crap almost ruined my day
        IMG_SIZE = 32 * 32 * 3

        self.fc1 = nn.Linear(IMG_SIZE, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.selu(self.fc2(x))
        x = self.fc3(x)
        return x


model = DenseModel()
# model
model = basic_training(model)
basic_testing(model)

DenseModel is training


  0%|          | 0/20 [00:00<?, ?it/s]

Epoch: 0, Loss: 1.6972705125808716
Epoch: 1, Loss: 1.9705039262771606
Epoch: 2, Loss: 1.8144596815109253
Epoch: 3, Loss: 1.3507314920425415
Epoch: 4, Loss: 1.3221155405044556
Epoch: 5, Loss: 1.5546053647994995
Epoch: 6, Loss: 1.3810145854949951
Epoch: 7, Loss: 1.1232808828353882
Epoch: 8, Loss: 1.701409935951233
Epoch: 9, Loss: 1.3187580108642578
Epoch: 10, Loss: 1.2564128637313843
Epoch: 11, Loss: 1.3767695426940918
Epoch: 12, Loss: 1.0384756326675415
Epoch: 13, Loss: 1.0614036321640015
Epoch: 14, Loss: 1.2896673679351807
Epoch: 15, Loss: 1.6821651458740234
Epoch: 16, Loss: 1.0048247575759888
Epoch: 17, Loss: 1.274583101272583
Epoch: 18, Loss: 1.8488388061523438
Epoch: 19, Loss: 1.522212028503418
Training done
DenseModel is testing
Accuracy: 49.82%


0.4982

In [58]:
class DenseModel2(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, (3, 3)),
            nn.SELU(),
            nn.Conv2d(32, 64, (3, 3)),
            nn.ReLU(),
            nn.Conv2d(64, 64, (3,3)),
            nn.ELU(),
            nn.Flatten(),
            nn.Linear(64*(32-6)*(32-6), 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.SELU(),
            nn.Linear(64, 10),
        )

    def forward(self, x):
        return self.model(x)

model = DenseModel2()
basic_training(model)
test_model(model)

DenseModel2 is training


  0%|          | 0/20 [00:00<?, ?it/s]

Epoch: 0, Loss: 1.0359292030334473
Epoch: 1, Loss: 1.2979050874710083
Epoch: 2, Loss: 1.3302175998687744
Epoch: 3, Loss: 1.0358209609985352
Epoch: 4, Loss: 0.7818771004676819
Epoch: 5, Loss: 0.6878566145896912
Epoch: 6, Loss: 1.0331647396087646
Epoch: 7, Loss: 0.2888738214969635
Epoch: 8, Loss: 0.2849367558956146
Epoch: 9, Loss: 0.6591193079948425
Epoch: 10, Loss: 0.1849045306444168
Epoch: 11, Loss: 0.5032113194465637
Epoch: 12, Loss: 0.5448060035705566
Epoch: 13, Loss: 0.13271263241767883
Epoch: 14, Loss: 0.4024338126182556
Epoch: 15, Loss: 0.4045252501964569
Epoch: 16, Loss: 0.0171759482473135
Epoch: 17, Loss: 0.015329532325267792
Epoch: 18, Loss: 0.06224741041660309
Epoch: 19, Loss: 0.047683920711278915
Training done
Accuracy: 65.49% 6549/10000


In [34]:
class TestModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, (3, 3)),
            nn.SELU(),
            nn.Conv2d(32, 64, (3, 3)),
            nn.ReLU(),
            nn.Conv2d(64, 64, (3,3)),
            nn.ELU(),
            nn.Flatten(),
            nn.Linear(64*(32-6)*(32-6), 10),
        )

    def forward(self, x):
        return self.model(x)

model = TestModel()
basic_training(model)
test_model(model)

TestModel is training
Epoch: 0, Loss: 1.812021255493164
Epoch: 1, Loss: 1.6520737409591675
Epoch: 2, Loss: 1.5422548055648804
Epoch: 3, Loss: 1.4548585414886475
Epoch: 4, Loss: 1.411234736442566
Epoch: 5, Loss: 1.3604387044906616
Epoch: 6, Loss: 1.3752096891403198
Epoch: 7, Loss: 1.2885619401931763
Epoch: 8, Loss: 1.3203836679458618
Epoch: 9, Loss: 1.2560139894485474
Training done
Accuracy: 54.69% 5469/10000


### Model with convolutional layers

In [36]:
# adding convolutional layers to the model
class ModelConvolutionalLayers(torch.nn.Module):
    def __init__(self):
        super(ModelConvolutionalLayers, self).__init__()

        # if I write it with my hands, it doen't work, but when whappening in sequential, it works
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Linear(4 * 4 * 128, num_classes)
        )

    def forward(self, x):
        return self.model(x)
model = ModelConvolutionalLayers()
basic_training(model)
test_model(model)


ModelConvolutionalLayers is training
Epoch: 0, Loss: 2.232653856277466
Epoch: 1, Loss: 1.9715029001235962
Epoch: 2, Loss: 1.7839516401290894
Epoch: 3, Loss: 1.688223123550415
Epoch: 4, Loss: 1.6988224983215332
Epoch: 5, Loss: 1.6335655450820923
Epoch: 6, Loss: 1.5986485481262207
Epoch: 7, Loss: 1.5196000337600708
Epoch: 8, Loss: 1.5179378986358643
Epoch: 9, Loss: 1.4245572090148926
Training done
Accuracy: 47.05% 4705/10000


### Comparing different number of layers

In [124]:
# model with only 1 layer
class ModelSmallAmountLayers(torch.nn.Module):
    def __init__(self):
        super(ModelSmallAmountLayers, self).__init__()

        self.fc = nn.Linear(32 * 32 * 3, num_classes)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x
"""
model = ModelSmallAmountLayers()
print("ModelSmallAmountLayers is training")
basic_training(model, allow_print=False)
print("accuracy of model with only 1 layer")
test_model(model)

model = ModelFullyConnected()
print("ModelFullyConnected is training")
basic_training(model, allow_print=False)
print("accuracy of model with fully connected layers")
test_model(model)
"""

ModelSmallAmountLayers is training
accuracy of model with only 1 layer
Accuracy: 28.84% 2884/10000
ModelFullyConnected is training
accuracy of model with fully connected layers
Accuracy: 33.62% 3362/10000


Let's pretend that ModelConvolutionalLayers in model with big amount of layers and compare them together
as we can see from the output the model with only 1 layer is shifting much more than the model with 4 layers
also we can see that the accuracy of such model is lower

to be shorter, the more layers neural network has, the more complex objects it can detect, but the time (or resurses) for it's 
training would be increased

### Dropout

In [140]:
# adding dropout layer

global epochs
epochs = 20

class ModelDropout(torch.nn.Module):

    def __init__(self):
        super(ModelDropout, self).__init__()

        self.model = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.2),
            nn.Flatten(),
            nn.Linear(4 * 4 * 128, 128),
            nn.ReLU(),
        )

        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.model(x)
        return x

        

model = ModelDropout()
model = basic_training(model)
print("Accuracy with dropout")
test_model(model)

model = ModelConvolutionalLayers()
model = basic_training(model)
test_model(model)

epochs = 10

ModelDropout is training
Epoch: 0, Loss: 2.199138879776001
Epoch: 1, Loss: 2.0977420806884766
Epoch: 2, Loss: 1.4756028652191162
Epoch: 3, Loss: 1.3236998319625854
Epoch: 4, Loss: 2.085686445236206
Epoch: 5, Loss: 1.5807256698608398
Epoch: 6, Loss: 1.7575302124023438
Epoch: 7, Loss: 1.5117683410644531
Epoch: 8, Loss: 1.4893335103988647
Epoch: 9, Loss: 1.7689285278320312
Epoch: 10, Loss: 1.8285218477249146
Epoch: 11, Loss: 1.6035689115524292
Epoch: 12, Loss: 1.658751368522644
Epoch: 13, Loss: 1.3808714151382446
Epoch: 14, Loss: 1.8599021434783936
Epoch: 15, Loss: 1.5263110399246216
Epoch: 16, Loss: 1.829563856124878
Epoch: 17, Loss: 1.935354471206665
Epoch: 18, Loss: 1.7004787921905518
Epoch: 19, Loss: 1.7640712261199951
Training done
Accuracy with dropout
Accuracy: 57.07% 5707/10000
ModelConvolutionalLayers is training
Epoch: 0, Loss: 1.6173450946807861
Epoch: 1, Loss: 1.3811346292495728
Epoch: 2, Loss: 1.358305811882019
Epoch: 3, Loss: 1.3055436611175537
Epoch: 4, Loss: 1.361341238021

In [141]:
global epochs
epochs = 50

model = ModelDropout()
model = basic_training(model)
test_model(model)

model = ModelConvolutionalLayers()
model = basic_training(model)
test_model(model)
epochs = 10

ModelDropout is training
Epoch: 0, Loss: 3.0776095390319824
Epoch: 1, Loss: 2.9820263385772705
Epoch: 2, Loss: 2.7211365699768066
Epoch: 3, Loss: 2.8907506465911865
Epoch: 4, Loss: 2.7787039279937744
Epoch: 5, Loss: 2.4318625926971436
Epoch: 6, Loss: 2.7590668201446533
Epoch: 7, Loss: 2.548741340637207
Epoch: 8, Loss: 2.252854824066162
Epoch: 9, Loss: 2.676725387573242
Epoch: 10, Loss: 2.727212429046631
Epoch: 11, Loss: 2.5998783111572266
Epoch: 12, Loss: 2.324941873550415
Epoch: 13, Loss: 2.4985804557800293
Epoch: 14, Loss: 2.946016788482666
Epoch: 15, Loss: 2.466244697570801
Epoch: 16, Loss: 2.595196485519409
Epoch: 17, Loss: 2.03120493888855
Epoch: 18, Loss: 1.7425264120101929
Epoch: 19, Loss: 1.8041839599609375
Epoch: 20, Loss: 2.0775558948516846
Epoch: 21, Loss: 1.7552191019058228
Epoch: 22, Loss: 1.8387318849563599
Epoch: 23, Loss: 1.946933388710022
Epoch: 24, Loss: 2.024894952774048
Epoch: 25, Loss: 2.22885799407959
Epoch: 26, Loss: 2.2260210514068604
Epoch: 27, Loss: 2.02610492

Вообще такая проблема возникает при переобучении, поэтому в теории, надо взять и подождать много эпох

Забавно, что 20 эпох достаточно для всего этого...

### Batch normalisation

### Different activation funcs

### Different weigt initialization

### Different optimisers

### Learninig rate scheduler

### Batch size and learning rate