In [239]:
import torch
import torchvision
from torch import nn

import os
from matplotlib import pyplot as plt
import cv2
import imghdr
import numpy as np

In [240]:
DATA_DIR = 'data'
IMG_EXT = ['png', 'jpg', 'jpeg', 'bmp']

In [241]:
# Data cleanup

In [242]:
for img_class in os.listdir(DATA_DIR):
    for image in os.listdir(os.path.join(DATA_DIR, img_class)):
        image_path = os.path.join(DATA_DIR, img_class, image)
        try:
            img = cv2.imread(image_path)
            tip = imghdr.what(image_path)
            if tip not in IMG_EXT:
                os.remove(image_path)
        except Exception as e:
            print(f'Issue with {image_path}')

In [243]:
# Datasets

In [244]:
transform = torchvision.transforms.Compose([torchvision.transforms.Resize((255, 255)),
                                            torchvision.transforms.ToTensor()])

In [245]:
data = torchvision.datasets.ImageFolder(DATA_DIR, transform=transform)

In [246]:
# Dataloader

In [247]:
dataloader = torch.utils.data.DataLoader(dataset=data,
                                         batch_size=23,
                                         shuffle=True)

In [248]:
# x, y = next(iter(dataloader))

In [249]:
gen1 = torch.Generator().manual_seed(69)
train_set, test_set, val_set = torch.utils.data.random_split(dataloader, [0.7, 0.15, 0.15], generator=gen1)

In [255]:
print(len(train_set))
print(len(test_set))
print(len(val_set))

14
3
3


In [228]:
# x, y = next(a)

In [256]:
x.shape, y.shape

(torch.Size([23, 3, 255, 255]), torch.Size([23]))

In [322]:
class Model(nn.Module):
    
    def __init__(self):
        super().__init__()
        
        self.conv1_block = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=(3, 3)),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        
        self.conv2_block = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=(3, 3)),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        
        self.conv3_block = nn.Sequential(
            nn.Conv2d(32, 16, kernel_size=(3, 3)),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        
        self.flatten = nn.Flatten(start_dim=1, end_dim=-1)
        
        self.dense_block = nn.Sequential(
            nn.Linear(16 * 30 * 30 , 10),
            nn.ReLU()
        )
        
        self.output = nn.Sequential(
            nn.Linear(10, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        x = self.conv1_block(x)
        x = self.conv2_block(x)
        x = self.conv3_block(x)
        x = self.flatten(x)
        x = self.dense_block(x)
        return self.output(x).reshape((23))
        
        

In [323]:
model = Model()

In [324]:
 # Optimizer and loss
optimizer = torch.optim.Adam(model.parameters())
loss_fn = nn.BCELoss()

In [325]:
print(model)

Model(
  (conv1_block): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2_block): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3_block): Sequential(
    (0): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (dense_block): Sequential(
    (0): Linear(in_features=14400, out_features=10, bias=True)
    (1): ReLU()
  )
  (output): Sequential(
    (0): Linear(in_features=10, out_features=1, bias=True)
    (1): Sigmoid()
  )
)


In [326]:
# Train

In [331]:
def train_nn(epochs, net : torch.nn.Module, 
             train_set : torch.utils.data.DataLoader,
             val_set : torch.utils.data.DataLoader) -> (torch.nn.Module, list, list):
    
    net.train()                                              # Set the NN to training mode.
    
    epoch_count, train_loss_vals, val_loss_vals = [], [], [] # List that will be used to check model performance
    
    for epoch in range(1, epochs+1):
        for x_batch, y_batch in train_set.dataset:           # Joint the corresponding batches and unwrap them
            y_batch = y_batch.type(torch.float32)
            y_pred = net(x_batch)                            # Make predictions for the current batch
            loss = loss_fn(y_pred, y_batch)                  # Compute the loss of the predictions
            optimizer.zero_grad()                            # Reset the gradients
            loss.backward()                                  # Perform the backpropagation
            optimizer.step()                                 # Optimize the model
            
        for data, labels in val_set.dataset:
            labels = labels.type(torch.float32)
            preds = net(data)
            val_loss = loss_fn(preds, labels)
    
        # if epoch % 10 == 0:
        epoch_count.append(epoch)                            
        train_loss_vals.append(loss)
        print(f"Epoch {epoch}: Train loss of {loss}; Validation loss of {val_loss}")
        # print(net.state_dict())                            # Bad idea, the state_dict is huge
            
    return net, epoch_count, train_loss_vals, val_loss_vals

In [329]:
trained_model, epoch_l, train_loss_l, val_loss_l = train_nn(40, model, train_set, val_set)

Epoch 0: Train loss of 0.2499009370803833; Validation loss of 0.2870894968509674
Epoch 1: Train loss of 0.32643401622772217; Validation loss of 0.3261356055736542
Epoch 2: Train loss of 0.19381608068943024; Validation loss of 0.2981284260749817
Epoch 3: Train loss of 0.23232491314411163; Validation loss of 0.27362969517707825
Epoch 4: Train loss of 0.23079146444797516; Validation loss of 0.2524832785129547
Epoch 5: Train loss of 0.307251513004303; Validation loss of 0.1434963196516037
Epoch 6: Train loss of 0.2229301482439041; Validation loss of 0.24234504997730255
Epoch 7: Train loss of 0.2980499267578125; Validation loss of 0.17894232273101807
Epoch 8: Train loss of 0.2740064561367035; Validation loss of 0.21621140837669373
Epoch 9: Train loss of 0.23137404024600983; Validation loss of 0.27588745951652527
Epoch 10: Train loss of 0.2471102476119995; Validation loss of 0.2658098340034485
Epoch 11: Train loss of 0.16848936676979065; Validation loss of 0.17722778022289276
Epoch 12: Train

ValueError: not enough values to unpack (expected 4, got 3)