# Train a model to predict rotation of an image: 0°, 90°, 180° or 270°

In this simple example I will show to traina  simple model to check the angle of a rotated image. The accuracy can be very high and the model effective to clean the dataset

In [None]:
import numpy as np
import pandas as pd
import os
import sys
from PIL import Image
import matplotlib.pyplot as plt
from datetime import datetime
from tqdm.notebook import tqdm

import multiprocessing as mp

import torch
import torch.nn as nn
import torchvision.transforms as T
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

sys.path.insert(1, "../input/timm-pytorch-image-models/pytorch-image-models-master/")

import timm

ROOT = '/kaggle/input/hotel-id-2021-fgvc8'
TRAIN_PATH = os.path.join(ROOT, "train_images")
TEST_PATH = os.path.join(ROOT, "test_images")
TRAIN_CSV = os.path.join(ROOT, "train.csv")

# If you want to resize tehe dataset to speed up all the training set it to True
RESIZE_ALL_DATASET = False

device = "cuda" if torch.cuda.is_available() else "cpu"

# load train dataframe
df_hotels = pd.read_csv(TRAIN_CSV)

# Don't sample it ! It's just to save time
df_hotels = df_hotels.sample(1000)
print(f"len df_hotels = {len(df_hotels)}")

# 1. Resize the dataset (maximum width or height: 512)

Resize the dataset will help us to speed up the dataloader and the also the entire training

In [None]:
if RESIZE_ALL_DATASET:
    import threading

    # Use multithreading to speed up the resize job
    def resize_thread(df, t_id):
        n_tot = len(df)
        for item in tqdm(df.iterrows(), total=n_tot):
            file_path = os.path.join(TRAIN_PATH, str(item[1]["chain"]), item[1]["image"])
            img = Image.open(file_path)
            width, height = img.size
            if width > 512 or height > 512:
                max_size = max(width, height)
                scale = 512/max_size
                img = img.resize((int(width*scale), int(scale*height)))
                out_folder = os.path.join("train_resized", str(item[1]["chain"]))
                os.makedirs(out_folder, exist_ok=True)
                img.save(os.path.join(out_folder, item[1]["image"]))

    image_name = df_hotels["image"].values
    chain = df_hotels["chain"].values

    # create the new resized dataset dir
    os.makedirs("train_resized", exist_ok=True)

    n_tot = len(df_hotels)
    df_hotels.index = range(n_tot)

    # Start threads
    num_thread = mp.cpu_count()
    num_elems = n_tot//num_thread
    thread_list = []
    for i in range(num_thread):
        i1 = i*num_elems
        if i==num_thread-1:
            i2 = n_tot
        else:
            i2 = (i+1)*num_elems

        thread_list += [threading.Thread(target=resize_thread, args=(df_hotels.iloc[i1:i2],i))]

    for i in range(num_thread):
        thread_list[i].start()

    for i in range(num_thread):
        thread_list[i].join()
    
    TRAIN_PATH = "train_resized"

## 1.1 Compute the aspect ratio of images

In [None]:
image_name = df_hotels["image"].values
chain = df_hotels["chain"].values
# Add the information of image width, height and aspect ration to the dataframe
width = []
height = []
for i in tqdm(range(len(image_name))):
    img = Image.open(os.path.join(TRAIN_PATH, str(chain[i]), image_name[i]))
    w, h = img.size
    width += [w]
    height += [h]
width = np.array(width)
height = np.array(height)

df_hotels["width"] = width
df_hotels["height"] = height
df_hotels["ar"] = width/height
df_hotels.head()

## 1.2 Check images with different aspect ratio to check if rotations happen more frequently on a specific AR

In [None]:
def show_ar_images(index):
    fig, ax = plt.subplots(5,10, figsize=(30,15))
    for i in range(50):
        sample = df_hotels[index].sample(1)
        img = Image.open(os.path.join(TRAIN_PATH, str(sample["chain"].item()), sample["image"].item()))
        ax[i//10,i%10].imshow(img)

In [None]:
show_ar_images(df_hotels["ar"]<=0.75)

In [None]:
show_ar_images(df_hotels["ar"]>0.75)

It seems that rotations are more frequent for aspect ratio greather than 0.75, then we can train our network using images with ar <= 0.75

# 2. Training

In [None]:
import sklearn.metrics as metrics
# Hyperparameters
# there are 4 possible rotations: [0, 90, 180, 270]  degrees
num_classes = 4
batch_size = 64
# number of rotations to sample from [0, 90, 180, 270] during training
num_rotations = 2
# number of workers for the data loader
num_workers = mp.cpu_count()
# number of epochs ----------------------- (CHANGE to 40 !)
epochs = 10
# learning rate
lr = 5e-4
#  label smoothing eps
smooth_eps = 0.15


## 2.1 Splitting
First of all split the dataset to train and validation set

In [None]:
# Dataset splitting: 85 % train and 15 % validation
df_hotels_ar = df_hotels[df_hotels["ar"]<=0.75]
n_sample = len(df_hotels_ar)
n_train = int(n_sample*0.85)
indexes = np.arange(n_sample)
np.random.shuffle(indexes)
df_train = df_hotels_ar.iloc[indexes[:n_train],:]
df_val = df_hotels_ar.iloc[indexes[n_train:],:]

## 2.2 Define the model

In [None]:
# Load pretrained model (on imagenet)
model = timm.create_model("efficientnet_b0", pretrained=False, checkpoint_path="../input/timm-pretrained-efficientnet/efficientnet/efficientnet_b0_ra-3dd342df.pth",drop_rate=0.4)
# Change the number of classes
model.classifier = nn.Linear(model.bn2.num_features, num_classes)
# get the default config
config = timm.data.resolve_data_config({}, model=model)
# get the default input size for the defined model
input_size = config["input_size"][1:]

## 2.3 Create datasets, preprocessing and dataloader

In [None]:
class RotationDataset(datasets.vision.VisionDataset):
    def __init__(
        self,
        root,
        df_hotels,        # The dataframe
        num_rotations=2,  # how many rotations of the same image choosen randomly by [0, 90, 180, 270]
        validation=False, # When validate the dataset, we want to validate all the rotations
        transform=None,
        loader=datasets.folder.default_loader,
    ):
        super(RotationDataset, self).__init__(
            root, transform=transform, target_transform=None
        )
        self.loader = loader
        self.validation = validation
        self.num_rotations = num_rotations

        image_name = df_hotels["image"].values
        chain = df_hotels["chain"].values

        self.samples = []
        for i in range(len(image_name)):
            self.samples.append(os.path.join(root, str(chain[i]), image_name[i]))

        self.samples = np.array(self.samples)

    def __getitem__(self, index):
        sample = self.loader(self.samples[index])

        if self.transform is not None:
            sample = self.transform(sample)

        samples = []

        if self.validation:
            labels = [0, 1, 2, 3]
        else:
            np.random.seed(index)
            labels = np.random.choice([0, 1, 2, 3], self.num_rotations)
        for l in labels:
            if l == 0:
                samples += [sample]
            elif l == 1:
                samples += [sample.transpose(1, 2)]   # This is equivalent of rotate(90)
            elif l == 2:
                samples += [sample.flip(1)]           # This is equivalent of rotate(180)
            elif l == 3:
                samples += [sample.transpose(1, 2).flip(2)]   # This is equivalent of rotate(270)

        return samples, list(labels)

    def __len__(self):
        return len(self.samples)

Define dataloader and dataset for training

In [None]:
# Very simple data augmentation for the training dataset
train_transform = T.Compose([T.RandomRotation(15),
                            T.RandomResizedCrop(input_size, scale=(0.5,1.0)),
                            T.RandomHorizontalFlip(0.5),
                            T.ColorJitter(0.15, 0.15, 0.15, 0.1),
                            T.RandomGrayscale(0.2),
                            T.ToTensor(),
                            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
# No augmentation for validation dataset
val_transform = T.Compose([T.Resize(input_size),
                            T.ToTensor(),
                            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

train_dataset = RotationDataset(TRAIN_PATH, df_train, num_rotations=num_rotations, transform=train_transform)
val_dataset = RotationDataset(TRAIN_PATH, df_val, validation=True, transform=val_transform)

# Data loaders
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
)

val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=num_workers,
    pin_memory=True,
)

## 2.4 Define optimizer and schedulers

In [None]:
# Label smoothing cross entropy loss
class LabelSmoothingCrossEntropy(nn.Module):
    def __init__(self, classes, epsilon=0.2, dim=-1):
        super(LabelSmoothingCrossEntropy, self).__init__()
        self.confidence = 1.0 - epsilon
        self.epsilon = epsilon
        self.cls = classes
        self.dim = dim

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            # true_dist = pred.data.clone()
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.epsilon / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
# you can also choose SGD
#optimizer = torch.optim.SGD(model.parameters(), lr=lr) 

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=epochs, eta_min=1e-5
)
# Cross entropy with label smoothing criterion (it hepls to calibrate the probabilities)
criterion = LabelSmoothingCrossEntropy(
    classes=num_classes,
    epsilon=smooth_eps,
)

## 2.5 Train!

In [None]:
# Eval function
def eval(
    model,  data_loader, device="cuda:0"
):
    metric_dict = {}
    pred_list = []
    label_list = []
    model.eval()
    model.to(device)
    with torch.no_grad():
        for images, labels in data_loader:
            
            images = torch.cat(images).to(device)
            labels = torch.cat(labels).to(device)

            logits = model(images.cuda())
            pred_labels = np.argmax(logits.cpu().numpy(), axis=1)
            pred_list += list(pred_labels)
            label_list += list(labels.cpu().numpy())

        metric_dict["accuracy"] = metrics.accuracy_score(label_list, pred_list)
        metric_dict["macro_precision"] = metrics.precision_score(
            label_list, pred_list, average="macro"
        )
        metric_dict["macro_recall"] = metrics.recall_score(
            label_list, pred_list, average="macro"
        )
        metric_dict["macro_f1"] = metrics.f1_score(
            label_list, pred_list, average="macro"
        )

        print(
            f'Val acc. {metric_dict["accuracy"]*100} %, meanPrecision {metric_dict["macro_precision"]}, meanRecall {metric_dict["macro_recall"]}, meanF1 {metric_dict["macro_f1"]}'
        )

    return metric_dict

In [None]:
# Train function
def train(
    model,
    optimizer,
    scheduler,
    criterion,
    train_data_loader,
    val_data_loader,
    epochs=50,
    batch_size=32,
    log_freq=0.25,
    save_every=10,
    device="cuda:0",
):
    iteration = 1
    date_now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

    num_samples = len(train_data_loader.dataset)
    num_batches = num_samples // batch_size
    log_step = max(int(log_freq * num_batches), 1)

    model.to(device)
    for epoch in range(1, epochs + 1):
        model.train()
        for batch, (images, labels) in enumerate(train_data_loader):
            optimizer.zero_grad()

            images = torch.cat(images).to(device)
            labels = torch.cat(labels).to(device)

            logits = model(images)

            loss = criterion(logits, labels)
            loss.backward()

            optimizer.step()

            iteration += 1
            if batch % log_step == 0:
                print(
                    f"[{batch}/{num_batches}] Epoch {epoch} : Train loss {loss.item()}"
                )

        scheduler.step()

        metric_dict = eval(model, val_data_loader, device=device)

        # Save checkpoints
        if epoch % save_every == 0:
            pth_name = (
                f'{epoch:03d}_{100*metric_dict["accuracy"]:.4f}.pth'
            )
            print(f"Saving {pth_name}")

            if not os.path.isdir(os.path.join("checkpoints", date_now)):
                os.makedirs(os.path.join("checkpoints", date_now), exist_ok=True)

            torch.save(
                model.state_dict(), os.path.join("checkpoints", date_now, pth_name)
            )

In [None]:
    train(
        model,
        optimizer,
        scheduler,
        criterion,
        train_loader,
        val_loader,
        epochs=epochs,
        batch_size=batch_size,
        device=device,
    )