# Custom CNN classifier
Try to build a custom CNN classifier for the dataset (using torch.nn)

### Dataloaders

In [1]:
import os
import torch
import torchvision

from pathlib import Path
from dotenv import load_dotenv

BATCH_SIZE = 8
LOADER_WORKERS = 8

load_dotenv()
root_data = os.getenv("KAGGLE_FILES_DIR")
dataset_path = Path(os.getcwd(), "..", root_data)
processed = Path(dataset_path, 'processed')


transformations = torchvision.transforms.Compose([
    torchvision.transforms.Resize((256, 256)),
    torchvision.transforms.ToTensor(),
])

train_dataset = torchvision.datasets.ImageFolder(root=str(Path(processed, 'train')), transform=transformations)
val_dataset = torchvision.datasets.ImageFolder(root=str(Path(processed, 'val')), transform=transformations)
test_dataset = torchvision.datasets.ImageFolder(root=str(Path(processed, 'test')), transform=transformations)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=LOADER_WORKERS)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=LOADER_WORKERS)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=LOADER_WORKERS)


### Size of datasets

In [2]:
print(f"Train dataset size: {len(train_dataset)}")
print(f"Validation dataset size: {len(val_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")

Train dataset size: 64654
Validation dataset size: 18472
Test dataset size: 9238


### Define CNN classifier
Using CNN with 3 convolutional layers and 2 fully connected layers.

In [3]:
import torch
import torch.nn as nn


class CNN(nn.Module):
    def __init__(self, input_size: torch.Size, initial_filters: int, out_classes: int, dropout: float = 0.25, device: str = "cpu", ):
        super(CNN, self).__init__()
        channels, _, _ = input_size
        self.device = device
        self.conv = nn.Sequential(
            nn.Conv2d(channels, initial_filters, kernel_size=3, stride=1, padding=1, device=self.device),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(initial_filters, initial_filters * 2, kernel_size=3, stride=1, padding=1, device=self.device),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(initial_filters * 2, initial_filters * 4, kernel_size=3, stride=1, padding=1, device=self.device),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
        )
        self.perceptron = nn.Sequential(
            nn.Linear(self._get_conv_out_shape(input_size), initial_filters * 8, device=self.device),
            nn.Dropout(dropout),
            nn.Linear(initial_filters * 8, out_classes, device=self.device),
        )
    
    def forward(self, x: torch.Tensor):
        x = x.to(self.device)
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        x = self.perceptron(x)
        return x
    
    def _get_conv_out_shape(self, input_size: torch.Size):
        with torch.no_grad():
            zeros = torch.zeros(*input_size, device=self.device)
            z = self.conv(zeros)
            z = torch.prod(torch.tensor(z.shape))
        return z


### Early stopper

In [4]:
class EarlyStopper:
    def __init__(self, patience: int = 5, min_delta: float = 0.001):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = float('inf')

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

### Evaluate function
For evaluation of the model, I will use accuracy and cross-entropy loss.

In [5]:
import numpy as np

from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

def evaluate(
        model: nn.Module, 
        valid_loader: torch.utils.data.DataLoader, 
        loss_func: nn.Module, 
        epoch_no: int, 
        writer: SummaryWriter
):    
    model.eval()
    epoch_loss = 0
    correct_class = 0
    targets_list = []
    preds_list = []
    
    dataset_size = len(valid_loader.dataset)
    
    with torch.no_grad():
        device = model.device
        for inputs, targets in tqdm(valid_loader, desc="Evaluation: "):
            inputs = inputs.to(device)
            targets = targets.to(device)
            output = model(inputs)
            pred_class = torch.argmax(output, dim=1)
            correct_class += (pred_class == targets).sum()
            loss = loss_func(output, targets)
            epoch_loss += loss.item() * inputs.size(0)
            
            # Count the number of targets and predictions
            targets_list.append(targets.cpu().numpy())
            preds_list.append(pred_class.cpu().numpy())
    
    avg_epoch_loss = epoch_loss / dataset_size
    accuracy = correct_class / dataset_size
    
    targets_np = np.concatenate(targets_list)
    preds_np = np.concatenate(preds_list)

    confusion_mat = confusion_matrix(targets_np, preds_np)
    precision = precision_score(targets_np, preds_np)
    recall = recall_score(targets_np, preds_np)
    f1 = f1_score(targets_np, preds_np)
    roc_auc = roc_auc_score(targets_np, preds_np)
    
    print(f"""Epoch: {epoch_no}
        Average epoch loss: {avg_epoch_loss}
        Confusion matrix: {confusion_mat}
        Accuracy: {accuracy}
        Precision: {precision:}
        Recall: {recall}
        F1: {f1}
        ROC AUC: {roc_auc}""")

    writer.add_scalar('Loss/train', avg_epoch_loss, epoch_no)
    writer.add_scalar('Accuracy/train', accuracy, epoch_no)
    writer.add_scalar('Precision/train', precision, epoch_no)
    writer.add_scalar('Recall/train', recall, epoch_no)
    writer.add_scalar('F1/train', f1, epoch_no)
    writer.add_scalar('ROC AUC/train', roc_auc, epoch_no)
    
    
    return avg_epoch_loss, accuracy, precision, recall, f1, roc_auc, confusion_mat

### Train function


In [6]:
import torch.optim as optim

def train(
        model: nn.Module, 
        train_loader: torch.utils.data.DataLoader,
        valid_loader: torch.utils.data.DataLoader,
        max_epochs: int,
        optimizer: optim.Optimizer, 
        loss_func: nn.Module,
        patience: int = 3,
        min_delta: float = 0.001,
):
    
    writer = SummaryWriter('../runs/baseline_lr01_delta001_100eps')
    device = model.device
    early_stopping = EarlyStopper(patience=patience, min_delta=min_delta)
    best_avg_loss = float('inf')
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)
    for epoch in range(1, max_epochs + 1):
        model.train()
        for inputs, targets in tqdm(train_loader, desc=f"Train epoch {epoch}: "):
            inputs = inputs.to(device)
            targets = targets.to(device)
            optimizer.zero_grad()
            predicted = model(inputs)
            loss = loss_func(predicted, targets)
            loss.backward()
            optimizer.step()
        
        avg_epoch_loss, accuracy, precision, recall, f1, roc_auc, confusion_mat = evaluate(model, valid_loader, loss_func, epoch, writer)
        
        lr_scheduler.step()
        
        if avg_epoch_loss < best_avg_loss:
            best_avg_loss = avg_epoch_loss
            torch.save(model.state_dict(), "../models/best_model_100eps.pt")
            print(f"Model saved on epoch {epoch}")
            
        if early_stopping.early_stop(avg_epoch_loss):
            print(f'''Early stopping on epoch {epoch}
            Validation loss: {avg_epoch_loss}
            Accuracy: {accuracy}
            Confusion Matrix: {confusion_mat}
            Precision: {precision}
            Recall: {recall}
            F1: {f1}
            ROC AUC: {roc_auc}''')
            break
    
    writer.close()


### Define training parameters

In [7]:
image, label = train_dataset[0]

DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

model_params = {
    "input_size": image.shape,
    "initial_filters": 8,
    "out_classes": len(train_dataset.classes),
    "device": DEVICE
}
cnn = CNN(**model_params)

loss_func = nn.CrossEntropyLoss()
optimizer = optim.SGD(cnn.parameters(), lr=0.01)

n_epochs = 100

## Train custom model - establish baseline

In [8]:
train(cnn, train_loader, val_loader, n_epochs, optimizer, loss_func)

Train epoch 1: 100%|██████████| 8082/8082 [02:46<00:00, 48.53it/s]
Evaluation: 100%|██████████| 2309/2309 [01:20<00:00, 28.76it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch: 1
        Average epoch loss: 0.579197199068941
        Confusion matrix: [[13429     0]
 [ 5043     0]]
        Accuracy: 0.7269921898841858
        Precision: 0.0
        Recall: 0.0
        F1: 0.0
        ROC AUC: 0.5
Model saved on epoch 1


Train epoch 2: 100%|██████████| 8082/8082 [02:33<00:00, 52.68it/s]
Evaluation: 100%|██████████| 2309/2309 [01:16<00:00, 30.01it/s]


Epoch: 2
        Average epoch loss: 0.5708402025074916
        Confusion matrix: [[13236   193]
 [ 4668   375]]
        Accuracy: 0.7368449568748474
        Precision: 0.6602112676056338
        Recall: 0.074360499702558
        F1: 0.13366601318837998
        ROC AUC: 0.5299943089770516
Model saved on epoch 2


Train epoch 3: 100%|██████████| 8082/8082 [02:32<00:00, 52.91it/s]
Evaluation: 100%|██████████| 2309/2309 [01:17<00:00, 29.92it/s]


Epoch: 3
        Average epoch loss: 0.5648014833460953
        Confusion matrix: [[13418    11]
 [ 4773   270]]
        Accuracy: 0.7410134077072144
        Precision: 0.9608540925266904
        Recall: 0.05353955978584176
        F1: 0.10142749812171299
        ROC AUC: 0.5263602184959442
Model saved on epoch 3


Train epoch 4: 100%|██████████| 8082/8082 [02:32<00:00, 52.93it/s]
Evaluation: 100%|██████████| 2309/2309 [01:16<00:00, 30.06it/s]


Epoch: 4
        Average epoch loss: 0.5668822142431467
        Confusion matrix: [[13270   159]
 [ 4640   403]]
        Accuracy: 0.7402014136314392
        Precision: 0.7170818505338078
        Recall: 0.07991275034701567
        F1: 0.14380017841213202
        ROC AUC: 0.5340363513444811


Train epoch 5: 100%|██████████| 8082/8082 [02:33<00:00, 52.74it/s]
Evaluation: 100%|██████████| 2309/2309 [01:16<00:00, 30.13it/s]


Epoch: 5
        Average epoch loss: 0.5550422505247691
        Confusion matrix: [[13385    44]
 [ 4711   332]]
        Accuracy: 0.7425833940505981
        Precision: 0.8829787234042553
        Recall: 0.06583382906999802
        F1: 0.12253183244140986
        ROC AUC: 0.5312786689470923
Model saved on epoch 5


Train epoch 6: 100%|██████████| 8082/8082 [02:32<00:00, 53.06it/s]
Evaluation: 100%|██████████| 2309/2309 [01:16<00:00, 30.00it/s]


Epoch: 6
        Average epoch loss: 0.5535723947959836
        Confusion matrix: [[13377    52]
 [ 4678   365]]
        Accuracy: 0.7439367771148682
        Precision: 0.8752997601918465
        Recall: 0.07237755304382312
        F1: 0.1336996336996337
        ROC AUC: 0.5342526680998398
Model saved on epoch 6


Train epoch 7: 100%|██████████| 8082/8082 [02:39<00:00, 50.59it/s]
Evaluation: 100%|██████████| 2309/2309 [01:18<00:00, 29.43it/s]


Epoch: 7
        Average epoch loss: 0.5491353556830009
        Confusion matrix: [[13363    66]
 [ 4680   363]]
        Accuracy: 0.7430706024169922
        Precision: 0.8461538461538461
        Recall: 0.07198096371207614
        F1: 0.13267543859649122
        ROC AUC: 0.5335331134741779
Model saved on epoch 7


Train epoch 8: 100%|██████████| 8082/8082 [02:34<00:00, 52.47it/s]
Evaluation: 100%|██████████| 2309/2309 [01:18<00:00, 29.41it/s]


Epoch: 8
        Average epoch loss: 0.5501335976107289
        Confusion matrix: [[13373    56]
 [ 4674   369]]
        Accuracy: 0.7439367771148682
        Precision: 0.8682352941176471
        Recall: 0.07317073170731707
        F1: 0.13496708119970738
        ROC AUC: 0.5345003260145044


Train epoch 9: 100%|██████████| 8082/8082 [02:33<00:00, 52.59it/s]
Evaluation: 100%|██████████| 2309/2309 [01:16<00:00, 30.00it/s]


Epoch: 9
        Average epoch loss: 0.5690172168826737
        Confusion matrix: [[13396    33]
 [ 4689   354]]
        Accuracy: 0.7443698644638062
        Precision: 0.9147286821705426
        Recall: 0.07019631171921475
        F1: 0.13038674033149172
        ROC AUC: 0.5338694716686773


Train epoch 10: 100%|██████████| 8082/8082 [02:31<00:00, 53.40it/s]
Evaluation: 100%|██████████| 2309/2309 [01:17<00:00, 29.75it/s]


Epoch: 10
        Average epoch loss: 0.5531532375481595
        Confusion matrix: [[13373    56]
 [ 4670   373]]
        Accuracy: 0.7441533207893372
        Precision: 0.8694638694638694
        Recall: 0.07396391037081103
        F1: 0.13633040935672514
        ROC AUC: 0.5348969153462514


Train epoch 11: 100%|██████████| 8082/8082 [02:31<00:00, 53.43it/s]
Evaluation: 100%|██████████| 2309/2309 [01:17<00:00, 29.92it/s]

Epoch: 11
        Average epoch loss: 0.5515866273922959
        Confusion matrix: [[13364    65]
 [ 4677   366]]
        Accuracy: 0.7432871460914612
        Precision: 0.8491879350348028
        Recall: 0.0725758477096966
        F1: 0.13372305443916696
        ROC AUC: 0.5338677883272589
Early stopping on epoch 11
            Validation loss: 0.5515866273922959
            Accuracy: 0.7432871460914612
            Confusion Matrix: [[13364    65]
 [ 4677   366]]
            Precision: 0.8491879350348028
            Recall: 0.0725758477096966
            F1: 0.13372305443916696
            ROC AUC: 0.5338677883272589





## Baseline

#### Final accuracy is 74% - just a bit better, than the random guess.

- CNN model:
    - 3 convolutional layers, each with padding 1, kernel size 3, stride 1
        - ReLU activation function after each convolutional layer
        - max pooling after each convolutional layer, kernel size 2
        - 8 initial filters
    - 2 fully connected layers
    - Dropout of 0.25 between FC layers
- Images: 
    - rezised to 256x256 
    - no augmentation
    - no normalization
    - no grayscale conversion
    - 3 channels
    - 70%/20%/10% split
- Dataloaders:
    - Batch size of 8
    - 8 workers
    - Shuffled
    - No pin memory
    - No drop last
Training and validation:
    - use GPU available (CUDA or MPS)
    - max 100 epochs
    - Learning rate of 0.01
    - Early stopping with patience of 3 epochs and minimum delta of 0.001
    - Loss function: Cross-entropy
    - Optimizer: SGD
    - Scheduler: StepLR with step size of 20 and gamma of 0.1