### Some preparations before we begin (or begging...)

In [15]:
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118


import torch
from torch import nn
import torchvision
import os
import pandas as pd
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
import numpy as np

print(torch.cuda.is_available())

def collate_fn(batch):
    images, labels = [], []

    for item in batch:
        images.append(item[0])
        labels.append(item[1])
    images = np.stack(images)
    images = torch.from_numpy(images).float()
    labels = torch.tensor(labels)
    return images, labels


# some config vars
batch_size = 32 * 2
num_classes = 10
epochs = 10
learning_rate = 0.0001
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# sample data
need_download = not os.path.exists('data')
train = torchvision.datasets.CIFAR10(root='data', train=True, download=need_download, transform=ToTensor())
dataset = DataLoader(train, batch_size=batch_size, shuffle=True)
test = torchvision.datasets.CIFAR10(root='data', train=False, download=need_download, transform=ToTensor())
dataset_test = DataLoader(test, batch_size=batch_size, shuffle=True)


(train, dataset)

True


(Dataset CIFAR10
     Number of datapoints: 50000
     Root location: data
     Split: Train
     StandardTransform
 Transform: ToTensor(),
 <torch.utils.data.dataloader.DataLoader at 0x200bd5f9db0>)

In [7]:
# simple function for training and calculating loss
def basic_training(model, allow_print=True) -> nn.Module:
    if allow_print:
        print(f"{model.__class__.__name__} is training")

    model.to(device)
    opt = torch.optim.Adam(model.parameters(), lr=learning_rate)
    loss_fn = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        for batch in dataset:
            data, target = batch
            data, target = data.to(device), target.to(device)
            opt.zero_grad()
            output = model(data)
            loss = loss_fn(output, target)
            loss.backward()
            opt.step()

        if allow_print:
            print(f"Epoch: {epoch}, Loss: {loss.item()}")
        
    if allow_print:
        print("Training done")
    
    return model


In [2]:
# check the accuracy on the random data from the dataset
def test_model(model):
    model.to(device)

    # test the model on data form dataset_test
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in dataset_test:
            data, target = batch
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    print(f"Accuracy: {100 * correct / total}% {correct}/{total}")

### Model with only fully connected layers

In [16]:
# make a model only with fully connected layers
class DenseModel(torch.nn.Module):
    def __init__(self):
        super(DenseModel, self).__init__()

        # that piece of crap almost ruined my day
        IMG_SIZE = 32 * 32 * 3

        self.fc1 = nn.Linear(IMG_SIZE, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, num_classes)
        
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x


model = DenseModel()
# model
model = basic_training(model)
test_model(model)

DenseModel is training
Epoch: 0, Loss: 1.8294130563735962
Epoch: 1, Loss: 1.9834905862808228
Epoch: 2, Loss: 1.406627893447876
Epoch: 3, Loss: 1.7146100997924805
Epoch: 4, Loss: 1.5480809211730957
Epoch: 5, Loss: 1.71500563621521
Epoch: 6, Loss: 1.4947694540023804
Epoch: 7, Loss: 1.498091220855713
Epoch: 8, Loss: 1.964679479598999
Epoch: 9, Loss: 1.5170519351959229
Epoch: 10, Loss: 1.048494577407837
Epoch: 11, Loss: 1.5307738780975342
Epoch: 12, Loss: 1.4406089782714844
Epoch: 13, Loss: 1.601004719734192
Epoch: 14, Loss: 1.5936388969421387
Epoch: 15, Loss: 1.5161917209625244
Epoch: 16, Loss: 1.7282209396362305
Epoch: 17, Loss: 1.9453601837158203
Epoch: 18, Loss: 1.2604072093963623
Epoch: 19, Loss: 1.4299336671829224
Epoch: 20, Loss: 1.9541850090026855
Epoch: 21, Loss: 1.5568022727966309
Epoch: 22, Loss: 1.186700701713562
Epoch: 23, Loss: 1.5579442977905273
Epoch: 24, Loss: 0.9613252282142639
Epoch: 25, Loss: 1.2199445962905884
Epoch: 26, Loss: 1.0897176265716553
Epoch: 27, Loss: 1.4703

### Model with convolutional layers

In [139]:
# adding convolutional layers to the model
class ModelConvolutionalLayers(torch.nn.Module):
    def __init__(self):
        super(ModelConvolutionalLayers, self).__init__()

        # if I write it with my hands, it doen't work, but when whappening in sequential, it works
        self.model = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.fc = nn.Linear(4 * 4 * 128, num_classes)

    def forward(self, x):
        x = self.model(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x
"""
model = ModelConvolutionalLayers()
basic_training(model)
test_model(model)
"""


'\nmodel = ModelConvolutionalLayers()\nbasic_training(model)\ntest_model(model)\n'

### Comparing different number of layers

In [124]:
# model with only 1 layer
class ModelSmallAmountLayers(torch.nn.Module):
    def __init__(self):
        super(ModelSmallAmountLayers, self).__init__()

        self.fc = nn.Linear(32 * 32 * 3, num_classes)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x
"""
model = ModelSmallAmountLayers()
print("ModelSmallAmountLayers is training")
basic_training(model, allow_print=False)
print("accuracy of model with only 1 layer")
test_model(model)

model = ModelFullyConnected()
print("ModelFullyConnected is training")
basic_training(model, allow_print=False)
print("accuracy of model with fully connected layers")
test_model(model)
"""

ModelSmallAmountLayers is training
accuracy of model with only 1 layer
Accuracy: 28.84% 2884/10000
ModelFullyConnected is training
accuracy of model with fully connected layers
Accuracy: 33.62% 3362/10000


Let's pretend that ModelConvolutionalLayers in model with big amount of layers and compare them together
as we can see from the output the model with only 1 layer is shifting much more than the model with 4 layers
also we can see that the accuracy of such model is lower

to be shorter, the more layers neural network has, the more complex objects it can detect, but the time (or resurses) for it's 
training would be increased

### Dropout

In [140]:
# adding dropout layer

global epochs
epochs = 20

class ModelDropout(torch.nn.Module):

    def __init__(self):
        super(ModelDropout, self).__init__()

        self.model = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.2),
            nn.Flatten(),
            nn.Linear(4 * 4 * 128, 128),
            nn.ReLU(),
        )

        self.fc = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.model(x)
        return x

        

model = ModelDropout()
model = basic_training(model)
print("Accuracy with dropout")
test_model(model)

model = ModelConvolutionalLayers()
model = basic_training(model)
test_model(model)

epochs = 10

ModelDropout is training
Epoch: 0, Loss: 2.199138879776001
Epoch: 1, Loss: 2.0977420806884766
Epoch: 2, Loss: 1.4756028652191162
Epoch: 3, Loss: 1.3236998319625854
Epoch: 4, Loss: 2.085686445236206
Epoch: 5, Loss: 1.5807256698608398
Epoch: 6, Loss: 1.7575302124023438
Epoch: 7, Loss: 1.5117683410644531
Epoch: 8, Loss: 1.4893335103988647
Epoch: 9, Loss: 1.7689285278320312
Epoch: 10, Loss: 1.8285218477249146
Epoch: 11, Loss: 1.6035689115524292
Epoch: 12, Loss: 1.658751368522644
Epoch: 13, Loss: 1.3808714151382446
Epoch: 14, Loss: 1.8599021434783936
Epoch: 15, Loss: 1.5263110399246216
Epoch: 16, Loss: 1.829563856124878
Epoch: 17, Loss: 1.935354471206665
Epoch: 18, Loss: 1.7004787921905518
Epoch: 19, Loss: 1.7640712261199951
Training done
Accuracy with dropout
Accuracy: 57.07% 5707/10000
ModelConvolutionalLayers is training
Epoch: 0, Loss: 1.6173450946807861
Epoch: 1, Loss: 1.3811346292495728
Epoch: 2, Loss: 1.358305811882019
Epoch: 3, Loss: 1.3055436611175537
Epoch: 4, Loss: 1.361341238021

In [141]:
global epochs
epochs = 50

model = ModelDropout()
model = basic_training(model)
test_model(model)

model = ModelConvolutionalLayers()
model = basic_training(model)
test_model(model)
epochs = 10

ModelDropout is training
Epoch: 0, Loss: 3.0776095390319824
Epoch: 1, Loss: 2.9820263385772705
Epoch: 2, Loss: 2.7211365699768066
Epoch: 3, Loss: 2.8907506465911865
Epoch: 4, Loss: 2.7787039279937744
Epoch: 5, Loss: 2.4318625926971436
Epoch: 6, Loss: 2.7590668201446533
Epoch: 7, Loss: 2.548741340637207
Epoch: 8, Loss: 2.252854824066162
Epoch: 9, Loss: 2.676725387573242
Epoch: 10, Loss: 2.727212429046631
Epoch: 11, Loss: 2.5998783111572266
Epoch: 12, Loss: 2.324941873550415
Epoch: 13, Loss: 2.4985804557800293
Epoch: 14, Loss: 2.946016788482666
Epoch: 15, Loss: 2.466244697570801
Epoch: 16, Loss: 2.595196485519409
Epoch: 17, Loss: 2.03120493888855
Epoch: 18, Loss: 1.7425264120101929
Epoch: 19, Loss: 1.8041839599609375
Epoch: 20, Loss: 2.0775558948516846
Epoch: 21, Loss: 1.7552191019058228
Epoch: 22, Loss: 1.8387318849563599
Epoch: 23, Loss: 1.946933388710022
Epoch: 24, Loss: 2.024894952774048
Epoch: 25, Loss: 2.22885799407959
Epoch: 26, Loss: 2.2260210514068604
Epoch: 27, Loss: 2.02610492

Вообще такая проблема возникает при переобучении, поэтому в теории, надо взять и подождать много эпох

Забавно, что 20 эпох достаточно для всего этого...

### Batch normalisation

### Different activation funcs

### Different weigt initialization

### Different optimisers

### Learninig rate scheduler

### Batch size and learning rate