# Convolutional Neural Network for Horse-Human Binary Classification

This Jupyter Notebook builds a binary classifier in PyTorch using the "$\textit{horse or human}$" dataset for training.

First we import the necessary packages. Three non-standard packages used are $\texttt{Pytorch}$, $\texttt{NumPy}$ and $\texttt{OpenCV}$.

In [1]:
import torch
from torch import nn, optim
from torch.optim.optimizer import Optimizer
from torch.utils.tensorboard import SummaryWriter
import glob
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from torch.nn import functional as F
from typing import Union, NamedTuple, List, Set, Dict, Tuple, Optional

import cv2
import time
from copy import deepcopy
import numpy as np

batch_size = 64

The train and validation images are available for download from https://laurencemoroney.com/datasets.html. Uncomment the lines below if the dataset has not been unzipped yet.

In [2]:
# import os
# import zipfile

# local_zip = 'horse-or-human.zip'
# zip_ref = zipfile.ZipFile(local_zip, 'r')
# zip_ref.extractall('horse-or-human')
# zip_ref.close()
# local_zip = 'validation-horse-or-human.zip'
# zip_ref = zipfile.ZipFile(local_zip, 'r')
# zip_ref.extractall('validation-horse-or-human')
# zip_ref.close()

## Custom Dataset
We have to build our own Dataset class, bacuase we can't use an easy built-in dataset (such as MNIST etc).

In [3]:
class CustomDataset(Dataset):
    def __init__(self, folder_name: str, transform=None):
        self.imgs_path = folder_name
        self.transform = transform
        file_list = glob.glob(self.imgs_path + "*")
        self.data = []
        for class_path in file_list:
            class_name = class_path.split("/")[-1]
            for img_path in glob.glob(class_path + "/*.png"):
                self.data.append([img_path, class_name])
        self.class_map = {"horses" : 0, "humans": 1}
        self.img_dim = (128, 128)
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        img_path, class_name = self.data[idx]
        img = cv2.imread(img_path)
        img = cv2.resize(img, self.img_dim)
        class_id = self.class_map[class_name]

        class_id = torch.tensor(class_id)
        if self.transform:
            img_tensor = self.transform(img)*255
        else:
            img_tensor = torch.from_numpy(img).permute(2,0,1)
        img_tensor = img_tensor.to(torch.float)
#         cv2.imshow('show',np.array(img_tensor.permute(1, 2, 0).cpu().detach().numpy(), dtype=np.uint8()))
#         cv2.waitKey(1000)
        return img_tensor, class_id

## Transformations
Data augmentation is a must when the dataset is as very small as this. We apply
- random horizontal flip
- random rotation
- random crop
- random hsv and contrast adjustment
- randomly sets to grayscale

This should provide more variety in the dataset.

In [4]:
transformation = transforms.Compose([
    transforms.ToPILImage(mode=None),
    transforms.RandomHorizontalFlip(),
    transforms.RandomApply(torch.nn.ModuleList([
        transforms.Grayscale(num_output_channels=3)
    ]), p=0.1),
    transforms.RandomRotation(20),
    transforms.RandomResizedCrop(size=(128, 128), scale=(0.5, 1.2), ratio=(0.9, 1.1)),
    transforms.ColorJitter(brightness=0.4, contrast=0.3, saturation=0.3, hue=0.1),
    transforms.ToTensor(),
])

dataset_train = CustomDataset("horse-or-human/", transform=transformation)
print('Train dataset size: ' + str(len(dataset_train)))
train_loader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)

dataset_test = CustomDataset("validation-horse-or-human/")
print('Test dataset size: ' + str(len(dataset_test)))
val_loader = DataLoader(dataset_test, batch_size=batch_size, shuffle=True)

Train dataset size: 1027
Test dataset size: 256


In [5]:
class ImageShape(NamedTuple):
    height: int
    width: int
    channels: int

## CNN
The network architecture is as follows:

Image (128,128,3) $\rightarrow$ Conv (32) $\rightarrow$ Pool (2,2) $\rightarrow$ Conv (64) $\rightarrow$ Pool (2,2) $\rightarrow$ Conv (64) $\rightarrow$ Pool (2,2) $\rightarrow$ FullyConnected (128) $\rightarrow$ FullyConnected (2).

The activation function is leaky rectified linear unit (leaky ReLU), there is batch normalisation to appease internal covariance shift, there is dropout for better generalisation.

In [6]:
class HorseNN(nn.Module):
    def __init__(self, height: int, width: int, channels: int, class_count: int):
        super(HorseNN, self).__init__()
        self.input_shape = ImageShape(height=height, width=width, channels=channels)
        self.class_count = class_count
        
        self.conv1 = nn.Conv2d(in_channels=self.input_shape.channels,out_channels=32,kernel_size=(5, 5),padding=(2, 2))
        self.initialise_layer(self.conv1)
        
        self.conv2 = nn.Conv2d(in_channels=32,out_channels=64,kernel_size=(5, 5),padding=(2, 2))
        self.initialise_layer(self.conv2)
        
        self.conv3 = nn.Conv2d(in_channels=64,out_channels=64,kernel_size=(5, 5),padding=(2, 2))
        self.initialise_layer(self.conv2)
        
        self.fc1 = nn.Linear(16*16*64, 128)
        self.initialise_layer(self.fc1)
        
        self.fc2 = nn.Linear(128, 2)
        self.initialise_layer(self.fc2)
        
        self.convolution = nn.Sequential(
            self.conv1,
            nn.BatchNorm2d(32),
            nn.LeakyReLU(0.02, True),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
            self.conv2,
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.02, True),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2)),
            self.conv3,
            nn.BatchNorm2d(64),
            nn.LeakyReLU(0.02, True),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        )
        self.fully_connected = nn.Sequential(
            nn.Dropout(0.6),
            self.fc1,
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.02, True),
            nn.Dropout(0.6),
            self.fc2
        )
        
    def forward(self, images: torch.Tensor) -> torch.Tensor:
        x = self.convolution(images)
        x = torch.flatten(x,start_dim=1)
        x = self.fully_connected(x)
        return x
        
        
    @staticmethod
    def initialise_layer(layer):
        if hasattr(layer, "bias"):
            nn.init.zeros_(layer.bias)
        if hasattr(layer, "weight"):
            nn.init.kaiming_normal_(layer.weight)

In [7]:
def compute_accuracy(
    labels: Union[torch.Tensor, np.ndarray], preds: Union[torch.Tensor, np.ndarray]
) -> float:
    """
    Args:
        labels: ``(batch_size, class_count)`` tensor or array containing example labels
        preds: ``(batch_size, class_count)`` tensor or array containing model prediction
    """
    assert len(labels) == len(preds)
    return float((labels == preds).sum()) / len(labels)


def get_summary_writer_log_dir(batch_size, learning_rate) -> str:
    return ""
    """Get a unique directory that hasn't been logged to before for use with a TB
    SummaryWriter.
    Args:
        args: CLI Arguments
    Returns:
        Subdirectory of log_dir with unique subdirectory name to prevent multiple runs
        from getting logged to the same TB log directory (which you can't easily
        untangle in TB).
    """
    tb_log_dir_prefix = f'CNN_bs={batch_size}_lr={learning_rate}_run_'
    i = 0
    while i < 1000:
        tb_log_dir = args.log_dir / (tb_log_dir_prefix + str(i))
        if not tb_log_dir.exists():
            return str(tb_log_dir)
        i += 1
    return str(tb_log_dir)


Check if gpu is available.

In [8]:
if torch.cuda.is_available():
    DEVICE = torch.device("cuda")
else:
    DEVICE = torch.device("cpu")

  return torch._C._cuda_getDeviceCount() > 0


## Training
Class Trainer is responsible for the training of the model, fetch data, forward pass, optimisation, back-progpagation etc.

In [9]:
class Trainer:
    def __init__(
        self,
        model: nn.Module,
        train_loader: DataLoader,
        val_loader: DataLoader,
        criterion: nn.Module,
        optimizer: Optimizer,
        summary_writer: SummaryWriter,
        device: torch.device,
        save_models: List[Tuple[nn.Module, float]]
    ):
        self.model = model.to(device)
        self.device = device
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.criterion = criterion
        self.optimizer = optimizer
        self.summary_writer = summary_writer
        self.step = 0
        self.save_models = save_models

    def train(
        self,
        epochs: int,
        val_frequency: int,
        print_frequency: int = 20,
        log_frequency: int = 5,
        start_epoch: int = 0
    ):
        self.model.train()
        for epoch in range(start_epoch, epochs):
            self.model.train()
            data_load_start_time = time.time()
            for batch, labels in self.train_loader:
                batch = batch.to(self.device)
                labels = labels.to(self.device)
                data_load_end_time = time.time()


                logits = self.model.forward(batch)

                loss = self.criterion(logits, labels)

                loss.backward()

                self.optimizer.step()
                self.optimizer.zero_grad()

                with torch.no_grad():
                    preds = logits.argmax(-1)
                    accuracy = compute_accuracy(labels, preds)

                data_load_time = data_load_end_time - data_load_start_time
                step_time = time.time() - data_load_end_time
                if ((self.step + 1) % log_frequency) == 0:
                    self.log_metrics(epoch, accuracy, loss, data_load_time, step_time)
                if ((self.step + 1) % print_frequency) == 0:
                    self.print_metrics(epoch, accuracy, loss, data_load_time, step_time)

                self.step += 1
                data_load_start_time = time.time()

            self.summary_writer.add_scalar("epoch", epoch, self.step)
            if ((epoch + 1) % val_frequency) == 0:
                self.validate()
                # self.validate() will put the model in validation mode,
                # so we have to switch back to train mode afterwards
                self.model.train()

    def print_metrics(self, epoch, accuracy, loss, data_load_time, step_time):
        epoch_step = self.step % len(self.train_loader)
        print(
                f"epoch: [{epoch}], "
                f"step: [{epoch_step}/{len(self.train_loader)}], "
                f"batch loss: {loss:.5f}, "
                f"batch accuracy: {accuracy * 100:2.2f}, "
                f"data load time: "
                f"{data_load_time:.5f}, "
                f"step time: {step_time:.5f}"
        )

    def log_metrics(self, epoch, accuracy, loss, data_load_time, step_time):
        self.summary_writer.add_scalar("epoch", epoch, self.step)
        self.summary_writer.add_scalars(
                "accuracy",
                {"train": accuracy},
                self.step
        )
        self.summary_writer.add_scalars(
                "loss",
                {"train": float(loss.item())},
                self.step
        )
        self.summary_writer.add_scalar(
                "time/data", data_load_time, self.step
        )
        self.summary_writer.add_scalar(
                "time/data", step_time, self.step
        )

    def validate(self):
        results = {"preds": [], "labels": []}
        total_loss = 0
        self.model.eval()

        # No need to track gradients for validation, we're not optimizing.
        with torch.no_grad():
            for batch, labels in self.val_loader:
                batch = batch.to(self.device)
                labels = labels.to(self.device)
                logits = self.model(batch)
                loss = self.criterion(logits, labels)
                total_loss += loss.item()
                preds = logits.argmax(dim=-1).cpu().numpy()
                results["preds"].extend(list(preds))
                results["labels"].extend(list(labels.cpu().numpy()))

        accuracy = compute_accuracy(
            np.array(results["labels"]), np.array(results["preds"])
        )
        average_loss = total_loss / len(self.val_loader)
        self.save_models.append((deepcopy(self.model.state_dict()), accuracy))

        self.summary_writer.add_scalars(
                "accuracy",
                {"test": accuracy},
                self.step
        )
        self.summary_writer.add_scalars(
                "loss",
                {"test": average_loss},
                self.step
        )
        print(f"validation loss: {average_loss:.5f}, accuracy: {accuracy * 100:2.2f}")


Running trainer.

In [16]:
model = HorseNN(128, 128, 3, 2)
criterion = nn.CrossEntropyLoss()
learning_rate = 5e-3
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
num_epochs = 8
save_models = []

log_dir = get_summary_writer_log_dir(batch_size, learning_rate)
print(f"Writing logs to {log_dir}")
summary_writer = SummaryWriter(
        str(log_dir),
        flush_secs=5
)
trainer = Trainer(
    model, train_loader, val_loader, criterion, optimizer, summary_writer, DEVICE, save_models
)

trainer.train(
    num_epochs,
    1,
    2,
    5,
)

summary_writer.close()

Writing logs to 
epoch: [0], step: [1/17], batch loss: 0.66488, batch accuracy: 68.75, data load time: 0.86164, step time: 2.31496
epoch: [0], step: [3/17], batch loss: 0.63468, batch accuracy: 73.44, data load time: 0.79027, step time: 2.37861
epoch: [0], step: [5/17], batch loss: 0.77927, batch accuracy: 67.19, data load time: 0.81880, step time: 2.44448
epoch: [0], step: [7/17], batch loss: 0.43584, batch accuracy: 81.25, data load time: 0.84438, step time: 2.44112
epoch: [0], step: [9/17], batch loss: 0.82480, batch accuracy: 64.06, data load time: 0.91859, step time: 2.36952
epoch: [0], step: [11/17], batch loss: 0.52750, batch accuracy: 81.25, data load time: 0.95038, step time: 2.93334
epoch: [0], step: [13/17], batch loss: 0.64953, batch accuracy: 70.31, data load time: 0.89654, step time: 2.59211
epoch: [0], step: [15/17], batch loss: 0.51164, batch accuracy: 75.00, data load time: 0.99659, step time: 2.74845
validation loss: 0.65131, accuracy: 89.84
epoch: [1], step: [0/17], 

## Choosing the model for classifier
The model is saved after each epoch with the validation accuracy. We choose the lowest accuracy model as our classifier and save it in a file. 

In [37]:
trainer.save_models.sort(key=lambda x: x[1], reverse=True)
trainer.model = HorseNN(128, 128, 3, 2)
trainer.model.load_state_dict(trainer.save_models[0][0])
trainer.validate() # can check which model is actually chosen
torch.save(trainer.save_models[0][0], 'my_horse_model.pth')

validation loss: 0.36068, accuracy: 90.62


# Check if you or your friends are horses :)
Or if your horse is a human.

In [76]:
softmax = nn.Softmax(dim=1)
classes = { 0: 'horse', 1: "human"}

mymodel = HorseNN(128, 128, 3, 2)
mymodel.load_state_dict(torch.load('my_horse_model.pth'))
mymodel.eval()

img = cv2.imread('white_horse.jpg')
if img.shape[0] < img.shape[1]:
    delta = img.shape[1] - img.shape[0]
    img = np.pad(img, ((delta // 2, delta // 2), (0, 0), (0, 0)))
else:
    delta = img.shape[0] - img.shape[1]
    img = np.pad(img, ((0, 0), (delta // 2, delta // 2), (0, 0)))
img = cv2.resize(img, (128, 128))
# cv2.imshow('window', img)
# cv2.waitKey(0)
img_tensor = torch.from_numpy(img).permute(2, 0, 1)
img_tensor = img_tensor.to(torch.float)

result = mymodel.forward(batch)

result = softmax(result)
print(result)
result = result[0]

print("It is %.4f percent you're a horse." % float(result[0]))

tensor([[0.9964, 0.0036]], grad_fn=<SoftmaxBackward>)
It is 0.9964 percent you're a horse.
