## First variant of CNN
1. Accepts 3 RGB channels (later I decided that grayscaling is the way)
2. I think it is inefficient because it takes color in consideration, but 
  type of hat depends only on shape
3. Once I trained it to 60% accuracy, but it seems it was just an accident

In [2]:
import torch
from torch import nn

class HeadgearRecognizer1(nn.Module):
  def __init__(self) -> None:
    super().__init__()
    self.extractor = nn.Sequential(
      nn.Conv2d(3, 16, kernel_size=3, padding=1),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2),

      nn.Conv2d(16, 32, kernel_size=3, padding=1),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2),
    )
    self.flatten = nn.Flatten()
    self.classifier = nn.Sequential(
      nn.Linear(32 * 56 * 56, 128),
      nn.ReLU(),
      nn.Dropout(0.15),

      nn.Linear(128, 64),
      nn.ReLU(),

      nn.Linear(64, 20),
    )
    self.softmax = nn.LogSoftmax(dim=1)
  
  def forward(self, x: torch.Tensor) -> torch.Tensor:
    x = self.extractor(x)
    x = self.flatten(x)
    x = self.classifier(x)
    return self.softmax(x)


## Second variant of CNN
1. Can be trained up to <60%
2. I understood that u can actually use ConvUnit(x, x) -- same amount of in_channels / out_channels

In [3]:
from torch import nn

class ConvUnit(nn.Module):
  def __init__(self, in_channels: int, out_channels: int, conv_kernel: int = 3, pool_kernel: int = 2) -> None:
    super().__init__()
    self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=conv_kernel, stride=1, padding=1)
    self.relu = nn.ReLU()
    self.pool = nn.MaxPool2d(kernel_size=pool_kernel, stride=pool_kernel)
  
  def forward(self, x: torch.Tensor) -> torch.Tensor:
    x = self.conv(x)
    x = self.relu(x)
    x = self.pool(x)
    return x

class DenseUnit(nn.Module):
  def __init__(self, in_features: int, out_features: int, dropout: int = 0) -> None:
    super().__init__()
    self.linear = nn.Linear(in_features, out_features)
    self.relu = nn.ReLU()
    if dropout > 0:
      self.dropout = nn.Dropout(dropout)
  
  def forward(self, x: torch.Tensor) -> torch.Tensor:
    x = self.linear(x)
    x = self.relu(x)
    if hasattr(self, 'dropout'):
      x = self.dropout(x)
    return x


In [None]:
from torch import nn

class HeadgearRecognizer2(nn.Module):
  def __init__(self) -> None:
    super().__init__()
    self.extractor = nn.Sequential(
      ConvUnit(1, 32),
      ConvUnit(32, 64),
      ConvUnit(64, 64),
      ConvUnit(64, 128),
    )
    self.flatten = nn.Flatten()
    self.classifier = nn.Sequential(
      DenseUnit(25088, 1024),
      DenseUnit(1024, 512, dropout=0.1),
      DenseUnit(512, 256),
      DenseUnit(256, 128, dropout=0.1),
      DenseUnit(128, 20),
    )
    self.softmax = nn.LogSoftmax(dim=1)
  
  def forward(self, x: torch.Tensor) -> torch.Tensor:
    x = self.extractor(x)
    x = self.flatten(x)
    x = self.classifier(x)
    return self.softmax(x)


### Important remark
Once I started using learning_rate = 0.001 and batch_size = 64, my neural networks started educating.
Before I had a problem that the progress of loss function was stopping on some particular value like 0.0939, and the accuracy was 
~= 7%

But yeah, the moment I used learning_rate=0.001 and batch_size = 64 everything seems to be working now

## Third variant of CNN
1. Can be trained up to 70%
2. Has increased kernel size in the first ConvUnit (which is probably important!)
3. In the subsequent networks I will try to increase this number

In [None]:
from torch import nn

class HeadgearRecognizer3(nn.Module):
  def __init__(self) -> None:
    super().__init__()
    self.extractor = nn.Sequential(
      ConvUnit(1, 32, conv_kernel=7, pool_kernel=4),
      ConvUnit(32, 64),
      ConvUnit(64, 128),
      ConvUnit(128, 128),
    )
    self.flatten = nn.Flatten()
    self.classifier = nn.Sequential(
      DenseUnit(4608, 512),
      DenseUnit(512, 128, dropout=0.1),
      DenseUnit(128, 20),
    )
    self.softmax = nn.LogSoftmax(dim=1)
  
  def forward(self, x: torch.Tensor) -> torch.Tensor:
    x = self.extractor(x)
    x = self.flatten(x)
    x = self.classifier(x)
    return self.softmax(x)


## Fourth variant of CNN
1. Can be trained up to 75%
2. Increased kernel sizes in the first two ConvUnits

In [None]:
from torch import nn

class HeadgearRecognizer4(nn.Module):
  def __init__(self) -> None:
    super().__init__()
    self.extractor = nn.Sequential(
      ConvUnit(1, 32, conv_kernel=7, pool_kernel=5),
      ConvUnit(32, 64, conv_kernel=5, pool_kernel=3),
      ConvUnit(64, 128),
    )
    self.flatten = nn.Flatten()
    self.classifier = nn.Sequential(
      DenseUnit(6272, 512),
      DenseUnit(512, 128, dropout=0.25),
      DenseUnit(128, 20),
    )
    self.softmax = nn.LogSoftmax(dim=1)
  
  def forward(self, x: torch.Tensor) -> torch.Tensor:
    x = self.extractor(x)
    x = self.flatten(x)
    x = self.classifier(x)
    return self.softmax(x)


## Fifth variant of CNN

In [None]:
from torch import nn

class HeadgearRecognizer5(nn.Module):
  def __init__(self) -> None:
    super().__init__()
    self.extractor = nn.Sequential(
      ConvUnit(1, 16, conv_kernel=7, pool_kernel=3),
      ConvUnit(16, 32, conv_kernel=7, pool_kernel=3),
      ConvUnit(32, 64),
      ConvUnit(64, 128),
    )
    self.flatten = nn.Flatten()
    self.classifier = nn.Sequential(
      DenseUnit(3200, 512),
      DenseUnit(512, 256, dropout=0.25),
      DenseUnit(256, 128, dropout=0.25),
      DenseUnit(128, 20),
    )
    self.softmax = nn.LogSoftmax(dim=1)
  
  def forward(self, x: torch.Tensor) -> torch.Tensor:
    x = self.extractor(x)
    x = self.flatten(x)
    x = self.classifier(x)
    return self.softmax(x)


### 6th variant of CNN
1. I started adding some batch normalization and I think it worked out
2. I am using 3 channels for RGB colors, while in previous iterations I was using only one channel for grayscaled values

In [None]:
from torch import nn

class ConvUnit(nn.Module):
  def __init__(self, in_channels: int, out_channels: int, conv_kernel: int = 3, pool_kernel: int = 2, normalization: bool = True) -> None:
    super().__init__()
    self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=conv_kernel, stride=1, padding=1)
    self.relu = nn.ReLU()
    self.normalize = normalization
    if normalization:
      self.batch_norm = nn.BatchNorm2d(out_channels)
    self.pool = nn.MaxPool2d(kernel_size=pool_kernel, stride=pool_kernel)
  
  def forward(self, x: torch.Tensor) -> torch.Tensor:
    x = self.conv(x)
    x = self.relu(x)
    if self.normalize:
      x = self.batch_norm(x)
    x = self.pool(x)
    return x

class DenseUnit(nn.Module):
  def __init__(self, in_features: int, out_features: int, dropout: int = 0, normalization: bool = True) -> None:
    super().__init__()
    self.linear = nn.Linear(in_features, out_features)
    self.relu = nn.ReLU()
    self.normalize = normalization
    if normalization:
      self.batch_norm = nn.BatchNorm1d(out_features)
    self.dropout = nn.Dropout(dropout)
  
  def forward(self, x: torch.Tensor) -> torch.Tensor:
    x = self.linear(x)
    x = self.relu(x)
    if self.normalize:
      x = self.batch_norm(x)
    x = self.dropout(x)
    return x


In [None]:
from torch import nn

class HeadgearRecognizer6(nn.Module):
  def __init__(self) -> None:
    super().__init__()
    self.extractor = nn.Sequential(
      ConvUnit(3, 32, conv_kernel=5, pool_kernel=3),
      ConvUnit(32, 64),
      ConvUnit(64, 128),
    )
    self.flatten = nn.Flatten()
    self.classifier = nn.Sequential(
      DenseUnit(41472 , 1024),
      DenseUnit(1024, 512, dropout=0.10),
      DenseUnit(512, 256, dropout=0.25),
      DenseUnit(256, 20, normalization=False),
    )
    self.softmax = nn.LogSoftmax(dim=1)
  
  def forward(self, x: torch.Tensor) -> torch.Tensor:
    x = self.extractor(x)
    x = self.flatten(x)
    x = self.classifier(x)
    return self.softmax(x)


### 7th attempt to build a CNN

In [None]:
from torch import nn

class HeadgearRecognizer7(nn.Module):
  def __init__(self) -> None:
    super().__init__()
    self.extractor = nn.Sequential(
      ConvUnit(3, 32, conv_kernel=5, pool_kernel=3),
      ConvUnit(32, 64),
      ConvUnit(64, 128),
      ConvUnit(128, 256),
    )
    self.flatten = nn.Flatten()
    self.classifier = nn.Sequential(
      DenseUnit(20736, 1024),
      DenseUnit(1024, 512, dropout=0.25),
      DenseUnit(512, 256, dropout=0.25),
      DenseUnit(256, 20, normalization=False),
    )
    self.softmax = nn.LogSoftmax(dim=1)
  
  def forward(self, x: torch.Tensor) -> torch.Tensor:
    x = self.extractor(x)
    x = self.flatten(x)
    x = self.classifier(x)
    return self.softmax(x)
