In [1]:
!git clone https://github.com/Wukkkinz-0725/animalImage_classification.git

Cloning into 'animalImage_classification'...
remote: Enumerating objects: 18607, done.[K
remote: Counting objects: 100% (18607/18607), done.[K
remote: Compressing objects: 100% (56/56), done.[K
remote: Total 18607 (delta 18561), reused 18581 (delta 18549), pack-reused 0[K
Receiving objects: 100% (18607/18607), 13.80 MiB | 23.90 MiB/s, done.
Resolving deltas: 100% (18561/18561), done.


In [2]:
!pip install -q efficientnet_pytorch

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for efficientnet_pytorch (setup.py) ... [?25l[?25hdone


In [3]:
import os
import random
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision

from torch.utils.data import Dataset, DataLoader, BatchSampler, random_split
from torchvision import transforms
import torchvision.models as models
from PIL import Image
from sklearn.model_selection import StratifiedShuffleSplit

In [4]:
os.chdir('./animalImage_classification/Released_Data')

## Baseline

In [5]:
# Create Dataset class for multilabel classification
class MultiClassImageDataset(Dataset):
    def __init__(self, ann_df, super_map_df, sub_map_df, img_dir, transform=None):
        self.ann_df = ann_df
        self.super_map_df = super_map_df
        self.sub_map_df = sub_map_df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.ann_df)

    def __getitem__(self, idx):
        img_name = self.ann_df['image'][idx]
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        super_idx = self.ann_df['superclass_index'][idx]
        super_label = self.super_map_df['class'][super_idx]

        sub_idx = self.ann_df['subclass_index'][idx]
        sub_label = self.sub_map_df['class'][sub_idx]

        if self.transform:
            image = self.transform(image)

        return image, super_idx, super_label, sub_idx, sub_label

class MultiClassImageTestDataset(Dataset):
    def __init__(self, super_map_df, sub_map_df, img_dir, transform=None):
        self.super_map_df = super_map_df
        self.sub_map_df = sub_map_df
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self): # Count files in img_dir
        return len([fname for fname in os.listdir(self.img_dir)])

    def __getitem__(self, idx):
        img_name = str(idx) + '.jpg'
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        return image, img_name

In [6]:
def stratified_split(dataset, stratify_by='superclass_index'):
    from torch.utils.data import Subset
    # Extract labels for stratification
    labels = np.array(dataset.ann_df[stratify_by])

    # Perform stratified split
    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.1, random_state=10)
    train_idx, val_idx = next(sss.split(np.zeros(len(labels)), labels))

    # Create train and validation subsets
    train_subset = Subset(dataset, train_idx)
    val_subset = Subset(dataset, val_idx)

    return train_subset, val_subset

In [166]:
# Load CIFAR-10 dataset
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from PIL import Image
import os
import pandas as pd

transform = transforms.Compose([transforms.ToTensor()])
cifar10_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

# Filter images evenly across remaining 7 classes
class_limit = 1000 // 7  # Approximate number of images per class
class_counts = {label: 0 for label in range(10) if label not in [2, 5, 6]}
filtered_images = []

for image, label in cifar10_dataset:
    if label in class_counts and class_counts[label] < class_limit:
        filtered_images.append((image, label))
        class_counts[label] += 1
    if all(count == class_limit for count in class_counts.values()):
        break

Files already downloaded and verified


In [167]:
# Read existing data from CSV file or create a new DataFrame if the file doesn't exist
csv_file = 'train_data.csv'
new_csv_file = 'train_data_v1.csv'
existing_df = pd.read_csv(csv_file)
DROP_IDX = [534, 589, 1013, 1231, 1274, 1501, 1827, 1922, 2191, 2195, 2197, 2548, 2575, 2578,
2690, 3049, 3099, 3100, 3292, 3481, 3702, 3743, 4099, 4565, 4850, 4914, 5039, 5150,
5222, 5350, 5557, 5726, 6037, 6262]
existing_df.drop(DROP_IDX, axis=0)
# Prepare CSV data for new images
new_csv_data = {'image': [], 'superclass_index': [], 'subclass_index': []}
for i, (image, _) in enumerate(filtered_images, start=6322):
    file_name = f'{i}.jpg'
    image_path = os.path.join('train_shuffle', file_name)
    image = transforms.ToPILImage()(image)
    image.save(image_path)

    # Update CSV data
    new_csv_data['image'].append(file_name)
    new_csv_data['superclass_index'].append(3)
    new_csv_data['subclass_index'].append(87)

new_df = pd.DataFrame(new_csv_data)
combined_df = existing_df.append(new_df, ignore_index=True)

# Write the combined DataFrame to the CSV file
combined_df.to_csv(new_csv_file, index=False)

  combined_df = existing_df.append(new_df, ignore_index=True)


In [168]:
train_ann_df = pd.read_csv('train_data_v1.csv')
super_map_df = pd.read_csv('superclass_mapping.csv')
sub_map_df = pd.read_csv('subclass_mapping.csv')

train_img_dir = 'train_shuffle'
test_img_dir = 'test_shuffle'

image_preprocessing = transforms.Compose([
    transforms.RandomRotation(10),      # rotate +/- 10 degrees
    transforms.RandomHorizontalFlip(),  # reverse 50% of images
    transforms.RandomVerticalFlip(),    # vertical flip of the image
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.02),  # random color jitter
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # random translation
    transforms.RandomResizedCrop(size=(32, 32), scale=(0.8, 1.0)),  # random crop and resize
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # normalization
])

# Create train and val split
train_dataset = MultiClassImageDataset(train_ann_df, super_map_df, sub_map_df, train_img_dir, transform=image_preprocessing)
train_dataset, val_dataset = stratified_split(train_dataset)

# Create test dataset
test_dataset = MultiClassImageTestDataset(super_map_df, sub_map_df, test_img_dir, transform=image_preprocessing)

# Create dataloaders
batch_size = 64
train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          shuffle=True)

val_loader = DataLoader(val_dataset,
                        batch_size=batch_size,
                        shuffle=True)

test_loader = DataLoader(test_dataset,
                         batch_size=1,
                         shuffle=False)

In [10]:
def show_image(image_path):
    image = Image.open(image_path).convert('RGB')  # Ensure it's read in RGB format
    return image

# Trainers

In [11]:
# Simple CNN
class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.block1 = nn.Sequential(
                        nn.Conv2d(3, 32, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(32),
                        nn.Conv2d(32, 32, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(32),
                        nn.Conv2d(32, 32, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(32),
                        nn.MaxPool2d(2, 2)
                      )

        self.block2 = nn.Sequential(
                        nn.Conv2d(32, 64, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(64),
                        nn.Conv2d(64, 64, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(64),
                        nn.Conv2d(64, 64, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(64),
                        nn.MaxPool2d(2, 2)
                      )

        self.block3 = nn.Sequential(
                        nn.Conv2d(64, 128, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(128),
                        nn.Conv2d(128, 128, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(128),
                        nn.Conv2d(128, 128, 3, padding='same'),
                        nn.ReLU(),
                        nn.BatchNorm2d(128),
                        nn.MaxPool2d(2, 2)
                      )

        self.fc1 = nn.Linear(4*4*128, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3a = nn.Linear(128, 4)
        self.fc3b = nn.Linear(128, 88)

    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = torch.flatten(x, 1) # flatten all dimensions except batch

        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        super_out = self.fc3a(x)
        sub_out = self.fc3b(x)
        return super_out, sub_out

class Trainer():
    def __init__(self, model, criterion, optimizer, train_loader, val_loader, test_loader=None, device='cuda'):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.device = device

    def train_epoch(self):
        running_loss = 0.0
        self.model.train()
        for i, data in enumerate(self.train_loader):
            inputs, super_labels, sub_labels = data[0].to(self.device), data[1].to(self.device), data[3].to(self.device)

            self.optimizer.zero_grad()
            super_outputs, sub_outputs = self.model(inputs)
            loss = 0
            loss += self.criterion(super_outputs, super_labels)
            loss += self.criterion(sub_outputs, sub_labels)
            loss.backward()
            self.optimizer.step()

            running_loss += loss.item()

        print(f'Training loss: {running_loss / len(self.train_loader):.3f}')

    def validate_epoch(self):
        super_correct, sub_correct, total = 0, 0, 0
        running_loss = 0.0
        self.model.eval()
        with torch.no_grad():
            for i, data in enumerate(self.val_loader):
                inputs, super_labels, sub_labels = data[0].to(self.device), data[1].to(self.device), data[3].to(self.device)

                super_outputs, sub_outputs = self.model(inputs)
                loss = 0
                loss += self.criterion(super_outputs, super_labels)
                _, super_predicted = torch.max(super_outputs.data, 1)
                super_correct += (super_predicted == super_labels).sum().item()
                loss += self.criterion(sub_outputs, sub_labels)
                _, sub_predicted = torch.max(sub_outputs.data, 1)
                sub_correct += (sub_predicted == sub_labels).sum().item()
                total += super_labels.size(0)
                running_loss += loss.item()

        print(f'Validation Loss: {running_loss / len(self.val_loader):.3f}')
        print(f'Validation Superclass Accuracy: {100 * super_correct / total:.2f} %')
        print(f'Validation Subclass Accuracy: {100 * sub_correct / total:.2f} %')

    def test(self, save_to_csv=False, threshold_super=0.95, threshold_sub=0.95, return_predictions=False):
        self.model.eval()
        if not self.test_loader:
            raise NotImplementedError('test_loader not specified')

        superclass_predictions = {'image': [], 'superclass_index': [], 'superclass_probs': []}
        subclass_predictions = {'image': [], 'subclass_index': [], 'subclass_probs': []}

        with torch.no_grad():
            for i, data in enumerate(self.test_loader):
                inputs, img_name = data[0].to(self.device), data[1]

                super_outputs, sub_outputs = self.model(inputs)

                super_probs = F.softmax(super_outputs, dim=1)
                sub_probs = F.softmax(sub_outputs, dim=1)
                _, super_predicted = torch.max(super_probs, 1)
                _, sub_predicted = torch.max(sub_probs, 1)

                super_predicted[torch.max(super_probs, 1).values < threshold_super] = 3
                sub_predicted[torch.max(sub_probs, 1).values < threshold_sub] = 87

                superclass_predictions['superclass_index'].append(super_predicted.item())
                superclass_predictions['superclass_probs'].append(super_probs.cpu().numpy())
                superclass_predictions['image'].append(img_name[0])

                subclass_predictions['subclass_index'].append(sub_predicted.item())
                subclass_predictions['subclass_probs'].append(sub_probs.cpu().numpy())
                subclass_predictions['image'].append(img_name[0])

        if save_to_csv:
            superclass_df = pd.DataFrame(data=superclass_predictions)
            superclass_df.to_csv('superclass_prediction.csv', index=False)

            subclass_df = pd.DataFrame(data=subclass_predictions)
            subclass_df.to_csv('subclass_prediction.csv', index=False)

        if return_predictions:
            return superclass_predictions, subclass_predictions


In [132]:
class AutoEncoderLossTrainer():
    def __init__(self, model, criterion, optimizer, train_loader, val_loader, test_loader=None, device='cuda'):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.device = device

    def custom_loss(self, super_outputs, super_labels, sub_outputs, sub_labels, decoded, original, alpha=0.5):
        classification_loss = nn.CrossEntropyLoss()
        reconstruction_loss = nn.MSELoss()

        loss_super = classification_loss(super_outputs, super_labels)
        loss_sub = classification_loss(sub_outputs, sub_labels)
        loss_recon = reconstruction_loss(decoded, original)

        return alpha * (loss_super + loss_sub) + (1 - alpha) * loss_recon

    def train_epoch(self):
        running_loss = 0.0
        self.model.train()
        for i, data in enumerate(self.train_loader):
            inputs, super_labels, sub_labels = data[0].to(self.device), data[1].to(self.device), data[3].to(self.device)

            self.optimizer.zero_grad()
            super_outputs, sub_outputs, decoded = self.model(inputs)
            loss = self.custom_loss(super_outputs, super_labels, sub_outputs, sub_labels, decoded, inputs)
            loss.backward()
            self.optimizer.step()

            running_loss += loss.item()

        print(f'Training loss: {running_loss / len(self.train_loader):.3f}')

    def validate_epoch(self):
        super_correct, sub_correct, total = 0, 0, 0
        running_loss = 0.0
        self.model.eval()
        with torch.no_grad():
            for i, data in enumerate(self.val_loader):
                inputs, super_labels, sub_labels = data[0].to(self.device), data[1].to(self.device), data[3].to(self.device)

                super_outputs, sub_outputs, decoded = self.model(inputs)
                _, super_predicted = torch.max(super_outputs.data, 1)
                super_correct += (super_predicted == super_labels).sum().item()
                _, sub_predicted = torch.max(sub_outputs.data, 1)
                sub_correct += (sub_predicted == sub_labels).sum().item()
                total += super_labels.size(0)
                loss = self.custom_loss(super_outputs, super_labels, sub_outputs, sub_labels, decoded, inputs)
                running_loss += loss.item()

        print(f'Validation Loss: {running_loss / len(self.val_loader):.3f}')
        print(f'Validation Superclass Accuracy: {100 * super_correct / total:.2f} %')
        print(f'Validation Subclass Accuracy: {100 * sub_correct / total:.2f} %')

    def test(self, save_to_csv=False, threshold_super=0.95, threshold_sub=0.95, return_predictions=False):
        self.model.eval()
        if not self.test_loader:
            raise NotImplementedError('test_loader not specified')

        superclass_predictions = {'image': [], 'superclass_index': [], 'superclass_probs': []}
        subclass_predictions = {'image': [], 'subclass_index': [], 'subclass_probs': []}

        with torch.no_grad():
            for i, data in enumerate(self.test_loader):
                inputs, img_name = data[0].to(self.device), data[1]

                super_outputs, sub_outputs = self.model(inputs)

                super_probs = F.softmax(super_outputs, dim=1)
                sub_probs = F.softmax(sub_outputs, dim=1)
                _, super_predicted = torch.max(super_probs, 1)
                _, sub_predicted = torch.max(sub_probs, 1)

                super_predicted[torch.max(super_probs, 1).values < threshold_super] = 3
                sub_predicted[torch.max(sub_probs, 1).values < threshold_sub] = 87

                superclass_predictions['superclass_index'].append(super_predicted.item())
                superclass_predictions['superclass_probs'].append(super_probs.cpu().numpy())
                superclass_predictions['image'].append(img_name[0])

                subclass_predictions['subclass_index'].append(sub_predicted.item())
                subclass_predictions['subclass_probs'].append(sub_probs.cpu().numpy())
                subclass_predictions['image'].append(img_name[0])

        if save_to_csv:
            superclass_df = pd.DataFrame(data=superclass_predictions)
            superclass_df.to_csv('superclass_prediction_mobilenet_autoencoder.csv', index=False)

            subclass_df = pd.DataFrame(data=subclass_predictions)
            subclass_df.to_csv('subclass_prediction_mobilenet_autoencoder.csv', index=False)

        if return_predictions:
            return superclass_predictions, subclass_predictions

In [22]:
class BCELossTrainer():
    def __init__(self, model, criterion, optimizer, train_loader, val_loader, test_loader=None, device='cuda'):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.test_loader = test_loader
        self.device = device

    def custom_loss(self, super_class_output, super_labels, sub_class_output, sub_labels):
        bce_loss = nn.BCEWithLogitsLoss()
        super_class_loss = bce_loss(super_class_output, F.one_hot(super_labels, num_classes=4).float())
        sub_class_loss = bce_loss(sub_class_output, F.one_hot(sub_labels, num_classes=88).float())
        return super_class_loss + sub_class_loss

    def train_epoch(self):
        running_loss = 0.0
        self.model.train()
        for i, data in enumerate(self.train_loader):
            inputs, super_labels, sub_labels = data[0].to(self.device), data[1].to(self.device), data[3].to(self.device)

            self.optimizer.zero_grad()
            super_outputs, sub_outputs = self.model(inputs)
            loss = self.custom_loss(super_outputs, super_labels, sub_outputs, sub_labels)
            loss.backward()
            self.optimizer.step()

            running_loss += loss.item()

        print(f'Training loss: {running_loss / len(self.train_loader):.3f}')

    def validate_epoch(self):
        super_correct, sub_correct, total = 0, 0, 0
        running_loss = 0.0
        self.model.eval()
        with torch.no_grad():
            for i, data in enumerate(self.val_loader):
                inputs, super_labels, sub_labels = data[0].to(self.device), data[1].to(self.device), data[3].to(self.device)

                super_outputs, sub_outputs = self.model(inputs)
                _, super_predicted = torch.max(super_outputs.data, 1)
                super_correct += (super_predicted == super_labels).sum().item()
                _, sub_predicted = torch.max(sub_outputs.data, 1)
                sub_correct += (sub_predicted == sub_labels).sum().item()
                total += super_labels.size(0)
                loss = self.custom_loss(super_outputs, super_labels, sub_outputs, sub_labels)
                running_loss += loss.item()

        print(f'Validation Loss: {running_loss / len(self.val_loader):.3f}')
        print(f'Validation Superclass Accuracy: {100 * super_correct / total:.2f} %')
        print(f'Validation Subclass Accuracy: {100 * sub_correct / total:.2f} %')

    def test(self, save_to_csv=False, threshold_super=0.95, threshold_sub=0.95, return_predictions=False):
        self.model.eval()
        if not self.test_loader:
            raise NotImplementedError('test_loader not specified')

        superclass_predictions = {'image': [], 'superclass_index': [], 'superclass_probs': []}
        subclass_predictions = {'image': [], 'subclass_index': [], 'subclass_probs': []}

        with torch.no_grad():
            for i, data in enumerate(self.test_loader):
                inputs, img_name = data[0].to(self.device), data[1]

                super_outputs, sub_outputs = self.model(inputs)

                super_probs = F.softmax(super_outputs, dim=1)
                sub_probs = F.softmax(sub_outputs, dim=1)
                _, super_predicted = torch.max(super_probs, 1)
                _, sub_predicted = torch.max(sub_probs, 1)

                super_predicted[torch.max(super_probs, 1).values < threshold_super] = 3
                sub_predicted[torch.max(sub_probs, 1).values < threshold_sub] = 87

                superclass_predictions['superclass_index'].append(super_predicted.item())
                superclass_predictions['superclass_probs'].append(super_probs.cpu().numpy())
                superclass_predictions['image'].append(img_name[0])

                subclass_predictions['subclass_index'].append(sub_predicted.item())
                subclass_predictions['subclass_probs'].append(sub_probs.cpu().numpy())
                subclass_predictions['image'].append(img_name[0])

        if save_to_csv:
            superclass_df = pd.DataFrame(data=superclass_predictions)
            superclass_df.to_csv('superclass_prediction.csv', index=False)

            subclass_df = pd.DataFrame(data=subclass_predictions)
            subclass_df.to_csv('subclass_prediction.csv', index=False)

        if return_predictions:
            return superclass_predictions, subclass_predictions

# Models

In [165]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from efficientnet_pytorch import EfficientNet

class CustomEfficientNet(nn.Module):
    def __init__(self, base_model, num_super_classes=4, num_sub_classes=88):
        super(CustomEfficientNet, self).__init__()
        self.base_model = base_model

        in_features = self.base_model._fc.in_features

        self.super_class_classifier = nn.Linear(in_features, num_super_classes)
        self.sub_class_classifier = nn.Linear(in_features, num_sub_classes)

    def forward(self, x):
        features = self.base_model.extract_features(x)

        pooled_features = F.adaptive_avg_pool2d(features, 1).squeeze(-1).squeeze(-1)

        super_class_output = self.super_class_classifier(pooled_features)
        sub_class_output = self.sub_class_classifier(pooled_features)

        return super_class_output, sub_class_output


In [15]:
class MobileNetAutoencoder(nn.Module):
    def __init__(self, num_super_classes=4, num_sub_classes=88):
        super(MobileNetAutoencoder, self).__init__()
        self.mobilenet_features = models.mobilenet_v2(pretrained=True).features

        self.encoder = nn.Sequential(
            self.mobilenet_features,
            nn.AdaptiveAvgPool2d((1, 1))
        )

        self.decoder = nn.Sequential(
            nn.Linear(1280, 512),
            nn.ReLU(),
            nn.Linear(512, 1024),
            nn.ReLU(),
            nn.Linear(1024, 32 * 32 * 3),
            nn.Sigmoid()
        )

        self.super_class_classifier = nn.Linear(1280, num_super_classes)
        self.sub_class_classifier = nn.Linear(1280, num_sub_classes)

    def forward(self, x):
        encoded = self.encoder(x)
        encoded = torch.flatten(encoded, 1)

        decoded = self.decoder(encoded)
        decoded = decoded.view(-1, 3, 32, 32)

        super_class_output = self.super_class_classifier(encoded)
        sub_class_output = self.sub_class_classifier(encoded)

        return super_class_output, sub_class_output, decoded


In [27]:
def test(trainer, save_to_csv=False, threshold_super=0.95, threshold_sub=0.95, return_predictions=False):
    trainer.model.eval()
    if not trainer.test_loader:
        raise NotImplementedError('test_loader not specified')

    superclass_predictions = {'image': [], 'superclass_index': [], 'superclass_probs': []}
    subclass_predictions = {'image': [], 'subclass_index': [], 'subclass_probs': []}

    with torch.no_grad():
        for i, data in enumerate(trainer.test_loader):
            inputs, img_name = data[0].to(trainer.device), data[1]

            super_outputs, sub_outputs = trainer.model(inputs)

            super_probs = F.softmax(super_outputs, dim=1)
            sub_probs = F.softmax(sub_outputs, dim=1)
            _, super_predicted = torch.max(super_probs, 1)
            _, sub_predicted = torch.max(sub_probs, 1)

            super_predicted[torch.max(super_probs, 1).values < threshold_super] = 3
            sub_predicted[torch.max(sub_probs, 1).values < threshold_sub] = 87

            superclass_predictions['superclass_index'].append(super_predicted.item())
            superclass_predictions['superclass_probs'].append(super_probs.cpu().numpy())
            superclass_predictions['image'].append(img_name[0])

            subclass_predictions['subclass_index'].append(sub_predicted.item())
            subclass_predictions['subclass_probs'].append(sub_probs.cpu().numpy())
            subclass_predictions['image'].append(img_name[0])

    if save_to_csv:
        superclass_df = pd.DataFrame(data=superclass_predictions)
        superclass_df.to_csv('superclass_prediction.csv', index=False)

        subclass_df = pd.DataFrame(data=subclass_predictions)
        subclass_df.to_csv('subclass_prediction.csv', index=False)

    if return_predictions:
        return superclass_predictions, subclass_predictions


In [140]:
# Training loop
def train_model(trainer, epoch):
    for epoch in range(epoch):
        print(f'Epoch {epoch+1}')
        trainer.train_epoch()
        trainer.validate_epoch()
        print('')
    print('Finished Training')

# Experiments

MobileNet_V2

In [137]:
# Init model and trainer
device = 'cuda'
model = MobileNetAutoencoder().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
trainer_mbv2 = AutoEncoderLossTrainer(model, criterion, optimizer, train_loader, val_loader, test_loader)



In [138]:
train_model(trainer_mbv2, epoch=20)

Epoch 1
Training loss: 2.269
Validation Loss: 2.077
Validation Superclass Accuracy: 77.58 %
Validation Subclass Accuracy: 39.59 %

Epoch 2
Training loss: 2.082
Validation Loss: 2.016
Validation Superclass Accuracy: 76.82 %
Validation Subclass Accuracy: 41.09 %

Epoch 3
Training loss: 2.061
Validation Loss: 1.951
Validation Superclass Accuracy: 78.76 %
Validation Subclass Accuracy: 43.35 %

Epoch 4
Training loss: 2.001
Validation Loss: 1.976
Validation Superclass Accuracy: 78.11 %
Validation Subclass Accuracy: 42.06 %

Epoch 5
Training loss: 2.209
Validation Loss: 2.299
Validation Superclass Accuracy: 67.60 %
Validation Subclass Accuracy: 41.20 %

Epoch 6


KeyboardInterrupt: ignored

In [None]:
test(trainer_mbv2, save_to_csv=True, threshold_super=0.9, threshold_sub=0.95, return_predictions=False)

EffcientNetB7

In [149]:
# Init model and trainer
device = 'cuda'
base_model = EfficientNet.from_pretrained('efficientnet-b7')
model = CustomEfficientNet(base_model).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
trainer_efb7 = BCELossTrainer(model, criterion, optimizer, train_loader, val_loader, test_loader)

Loaded pretrained weights for efficientnet-b7


In [150]:
train_model(trainer_efb7, epoch=20)

Epoch 1
Training loss: 0.433
Validation Loss: 0.825
Validation Superclass Accuracy: 37.77 %
Validation Subclass Accuracy: 22.00 %

Epoch 2
Training loss: 0.252
Validation Loss: 0.251
Validation Superclass Accuracy: 85.94 %
Validation Subclass Accuracy: 33.80 %

Epoch 3
Training loss: 0.208
Validation Loss: 0.214
Validation Superclass Accuracy: 86.59 %
Validation Subclass Accuracy: 32.94 %

Epoch 4
Training loss: 0.178
Validation Loss: 0.217
Validation Superclass Accuracy: 87.77 %
Validation Subclass Accuracy: 36.27 %

Epoch 5
Training loss: 0.175
Validation Loss: 0.203
Validation Superclass Accuracy: 88.73 %
Validation Subclass Accuracy: 33.37 %

Finished Training


In [142]:
train_model(trainer_efb7, epoch=20)

Epoch 1
Training loss: 0.464
Validation Loss: 0.528
Validation Superclass Accuracy: 67.38 %
Validation Subclass Accuracy: 29.72 %

Epoch 2
Training loss: 0.307
Validation Loss: 0.289
Validation Superclass Accuracy: 82.62 %
Validation Subclass Accuracy: 33.80 %

Epoch 3
Training loss: 0.276
Validation Loss: 0.262
Validation Superclass Accuracy: 84.44 %
Validation Subclass Accuracy: 34.12 %

Epoch 4
Training loss: 0.256
Validation Loss: 0.241
Validation Superclass Accuracy: 83.91 %
Validation Subclass Accuracy: 36.80 %

Epoch 5
Training loss: 0.238
Validation Loss: 0.216
Validation Superclass Accuracy: 86.05 %
Validation Subclass Accuracy: 37.88 %

Finished Training


EfficientNetB5

In [163]:
# Init model and trainer
device = 'cuda'
base_model = EfficientNet.from_pretrained('efficientnet-b5')
model = CustomEfficientNet(base_model).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
trainer_efb5 = BCELossTrainer(model, criterion, optimizer, train_loader, val_loader, test_loader)

Loaded pretrained weights for efficientnet-b5


In [164]:
train_model(trainer_efb5, epoch=35)

Epoch 1
Training loss: 0.451
Validation Loss: 0.410
Validation Superclass Accuracy: 66.31 %
Validation Subclass Accuracy: 32.40 %

Epoch 2
Training loss: 0.301
Validation Loss: 0.278
Validation Superclass Accuracy: 81.22 %
Validation Subclass Accuracy: 35.09 %

Epoch 3
Training loss: 0.318
Validation Loss: 0.311
Validation Superclass Accuracy: 79.29 %
Validation Subclass Accuracy: 36.48 %

Epoch 4
Training loss: 0.289
Validation Loss: 0.272
Validation Superclass Accuracy: 80.69 %
Validation Subclass Accuracy: 36.27 %

Epoch 5
Training loss: 0.279
Validation Loss: 0.286
Validation Superclass Accuracy: 81.33 %
Validation Subclass Accuracy: 34.76 %

Epoch 6
Training loss: 0.383
Validation Loss: 0.421
Validation Superclass Accuracy: 67.27 %
Validation Subclass Accuracy: 34.23 %

Epoch 7
Training loss: 0.470
Validation Loss: 0.525
Validation Superclass Accuracy: 53.43 %
Validation Subclass Accuracy: 30.90 %

Epoch 8
Training loss: 0.446
Validation Loss: 0.426
Validation Superclass Accuracy:

KeyboardInterrupt: ignored

In [130]:
def transform_df(df, threshold):
    df = df.copy()
    df['probs'] = df['subclass_probs']
    df['probs'] = df['probs'].str.strip('[]').str.split()
    df['probs'] = df['probs'].apply(lambda x: [float(i) for i in x[:-1]])
    df['Max_Prob'] = df['probs'].apply(max)
    df['Target'] = df['probs'].apply(lambda x: x.index(max(x)))
    df['Target'] = df['Target'].where(df['Max_Prob'] > threshold, 87)
    # print distribution
    print(df['Target'].value_counts())
    return df

In [67]:
def output_df(df, output_name):
    output = pd.DataFrame({'ID': df['image'], 'Target': df['Target']})
    output.to_csv(output_name, index=False)
    return output

EffcientNetB7 BCE more data

In [131]:
filename='subclass_prediction.csv'
df = pd.read_csv(filename)
sub_df_transformed = transform_df(df, 0.3)
output_df(sub_df_transformed, 'sub_test_effcientNet_moreData.csv')

87    11816
37      262
50      136
71       66
41       43
18       22
24       17
28       12
35        2
75        1
Name: Target, dtype: int64


Unnamed: 0,ID,Target
0,0.jpg,87
1,1.jpg,87
2,2.jpg,87
3,3.jpg,87
4,4.jpg,37
...,...,...
12372,12372.jpg,87
12373,12373.jpg,87
12374,12374.jpg,87
12375,12375.jpg,87


In [97]:
sub_df_transformed = transform_df(df, 0.8)
output_df(sub_df_transformed, 'sub_test_effcientNet_moreData.csv')

4     2728
6     1420
24    1073
2      916
57     746
37     695
21     669
50     522
75     476
71     473
30     435
65     347
62     323
72     273
52     262
68     228
41     167
26     118
36     113
51      78
28      74
43      45
7       34
18      30
77      26
70      26
12      15
25      14
9       12
64      12
35       8
84       5
13       5
63       2
78       2
34       1
38       1
49       1
17       1
15       1
Name: Target, dtype: int64


Unnamed: 0,ID,Target
0,0.jpg,21
1,1.jpg,24
2,2.jpg,71
3,3.jpg,4
4,4.jpg,37
...,...,...
12372,12372.jpg,4
12373,12373.jpg,71
12374,12374.jpg,57
12375,12375.jpg,52
