# Challenge 1

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
def is_running_on_kaggle():
    return "KAGGLE_KERNEL_RUN_TYPE" in os.environ and os.environ["KAGGLE_KERNEL_RUN_TYPE"] == "Interactive"
DATA_PATH = '/kaggle/input/aerial-cactus/' if is_running_on_kaggle() else 'data/'
print('Running on Kaggle' if is_running_on_kaggle() else 'Running locally')
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Running locally


Import required libraries

In [2]:
import torch
import torchvision
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
import torch.nn as nn
from tqdm import tqdm

In [3]:
# Check if CUDA is available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

Using cuda device


Define some useful constants.

In [22]:
ANNOTATIONS_FILE = DATA_PATH + 'train.csv'
IMG_DIR = DATA_PATH + 'train/train/'
CHECKPOINT_DIR = '/kaggle/working/checkpoints' if is_running_on_kaggle() else 'checkpoints/'
FIGURES_DIR = '/kaggle/working/figures' if is_running_on_kaggle() else 'figures/'

SEED = 42
BATCH_SIZE = 64
LEARNING_RATE = 1e-3
TRAIN_SPLIT = 0.8
EPOCHS = 40 if is_running_on_kaggle() else 5

Set the manual seed.

In [5]:
torch.manual_seed(42)

<torch._C.Generator at 0x725ca3047090>

Extend **Dataset** class for the **DatasetLoader** (define a mapping for images and labels).

In [6]:
from torchvision.io import read_image

class CactusDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

TODO: Try to preprocess like in ImProc

Instanciate a **Dataset** object on the training (+validation) data.

In [7]:
# Imagenet mean and std
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

In [8]:
import torchvision.transforms as transforms

# Transformation for the image data
transform = transforms.Compose([
    transforms.Resize(256, interpolation=transforms.InterpolationMode.BILINEAR),
    transforms.CenterCrop(224),
    transforms.ConvertImageDtype(torch.float32),
    transforms.Normalize(mean=mean, std=std),
])

In [9]:
# Create the dataset object
trainval_data = CactusDataset(ANNOTATIONS_FILE, IMG_DIR, transform=transform)

In [10]:
# Print the shape of the first image in the dataset
print(trainval_data[0][0].shape)

torch.Size([3, 224, 224])


Split the dataset into train + validation

In [11]:
from torch.utils.data import random_split

# Define the sizes of training and validation sets
train_size = int(TRAIN_SPLIT * len(trainval_data))
val_size = len(trainval_data) - train_size

# Split the dataset into training and validation sets
train_data, val_data = random_split(trainval_data, [train_size, val_size])

# Print the sizes of the training and validation sets
print("Training set size:", len(train_data))
print("Validation set size:", len(val_data))

Training set size: 14000
Validation set size: 3500


Let's define our first model.\
We are going to use the ResNet18 pretrained model and then we are going to add 1 linear FC output layer. The output will be a real value that we will feed into a Sigmoid function to squash it into the $[0, 1]$ interval, and we will do the classification by comparing the output of the Sigmoid with the $0.5$ treshold.\
Since we don't want to adjust ResNet18 weights, we are going to set the **requires_grad** property to **False** for each of its parameters.

In [12]:
from torchvision.models import vgg16, vgg16_weights

class VGGClassifier(nn.Module):
    def __init__(self, fc_size=None):
        super(VGGClassifier, self).__init__()
        
        vgg = vgg16(weights=vgg16_weights.IMAGENET1K_V1)
        for param in vgg.parameters():
            param.requires_grad = False
        
        self.vgg = vgg
        self.fc1 = nn.Linear(vgg.fc.out_features, fc_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(fc_size, 1)

    def forward(self, x):
        x = self.vgg(x)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

At this point, let's train our model.

In [13]:
# Utility function for saving epochs checkpoints
def save_checkpoint(model, optimizer, loss, dir, desc):
    checkpoint_dir = os.path.join(CHECKPOINT_DIR, dir)
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    
    checkpoint_path = os.path.join(checkpoint_dir, f'checkpoint_{desc}.pt')
    torch.save({
        'desc': desc,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
    }, checkpoint_path)

# Utility function for loading epochs checkpoints
def load_checkpoint(model, optimizer, dir, desc):
    checkpoint_path = os.path.join(CHECKPOINT_DIR, dir, f'checkpoint_{desc}.pt')
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    loss = checkpoint['loss']
    return model, optimizer, loss

In [14]:
# Training loop (1 epoch)
def train_model(train_dataloader, model, criterion, optimizer, checkpoint=False, desc='Training', dir_checkpoint=None):
    avg_train_loss = 0
    train_bar = tqdm(train_dataloader, desc=desc, leave=False)
    
    model.train()
    for X, y in train_bar:
        X = X.to(device)
        y = y.to(device)

        # Compute prediction and loss
        pred = model(X.float())
        y = y.reshape(-1, 1).float()
        loss = criterion(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Update the average loss
        avg_train_loss += loss.item() * len(y)

        # Update the loading bar
        train_bar.set_postfix({'loss': loss.item()})
        
    avg_train_loss = avg_train_loss / len(train_dataloader.dataset)
    train_bar.set_postfix({'loss': avg_train_loss})
    train_bar.close()

    if checkpoint:
        save_checkpoint(model, optimizer, avg_train_loss, dir_checkpoint, desc)

    return avg_train_loss

In [15]:
# Compute validation loss and error rate
def evaluate(val_dataloader, model, criterion, desc='Validation'):
    avg_val_loss = 0
    errors = []
    val_bar = tqdm(val_dataloader, desc=desc, leave=False)

    model.eval()
    tp = 0
    tn = 0
    fp = 0
    fn = 0

    with torch.no_grad():
        for X, y in val_bar:
            X = X.to(device)
            y = y.to(device)

            # Compute prediction and loss
            pred = model(X.float())
            y = y.reshape(-1, 1).float()
            loss = criterion(pred, y)

            # Save errors for error rate
            pred = nn.Sigmoid()(pred) > 0.5

            tp += torch.bitwise_and(y == 1, pred == 1).sum().item()
            tn += torch.bitwise_and(y == 0, pred == 0).sum().item()
            fp += torch.bitwise_and(y == 0, pred == 1).sum().item()
            fn += torch.bitwise_and(y == 1, pred == 0).sum().item()

            errors += pred != y

            # Update the average loss
            avg_val_loss += loss.item() * len(y)

            # Update the loading bar
            val_bar.set_postfix({'loss': loss.item()})
    
    avg_val_loss = avg_val_loss / len(val_dataloader.dataset)
    val_bar.set_postfix({'loss': avg_val_loss})
    val_bar.close()
    return avg_val_loss, errors, [[tp, fp], [fn, tn]]

In [16]:
# Early stopping
class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = float('inf')

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False


In [17]:
def f1_score(confusion_matrix):
    tp, fp = confusion_matrix[0]
    fn, _ = confusion_matrix[1]
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    return 2 * (precision * recall) / (precision + recall)

In [19]:
fc_sizes = [1, 6, 12, 24]

best_models = []

print(f'Running Training for {EPOCHS} epochs')

for fc_size in fc_sizes:
    print(f'>>>>>>>>     FC Size: {fc_size}     <<<<<<<<')
    # Create the model
    model = VGGClassifier(fc_size=fc_size).to(device)

    # Define the loss function and optimizer
    criterion = nn.BCEWithLogitsLoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

    # Create the dataloaders
    train_dataloader = torch.utils.data.DataLoader(train_data, BATCH_SIZE, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val_data, BATCH_SIZE, shuffle=False)

    train_losses = []
    val_losses = []
    err_rates = []
    f1_scores = []
    early_stopper = EarlyStopper(patience=10, min_delta=5e-4)
    best_epoch = None

    for epoch in range(EPOCHS):
        train_loss = train_model(train_dataloader, model, criterion, optimizer, checkpoint=False, desc=f'TrainingEpoch{(epoch + 1):02d}')
        val_loss, errors, confusion_matrix = evaluate(val_dataloader, model, criterion, desc=f'ValidationEpoch{(epoch + 1):02d}')
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        error_rate = (sum(errors) / len(val_data)).item()
        err_rates.append(error_rate)
        f1 = f1_score(confusion_matrix)
        f1_scores.append(f1)
        fpr = confusion_matrix[0][1] / (confusion_matrix[0][0] + confusion_matrix[0][1])
        fnr = confusion_matrix[1][0] / (confusion_matrix[1][0] + confusion_matrix[1][1])
        print(f'Epoch {epoch + 1}/{EPOCHS}\t Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f} - Error Rate: {error_rate:.4f} - F1 Score: {f1:.4f} - (FPR: {fpr:.4f} - FNR: {fnr:.4f})')
        if best_epoch is None or val_loss < val_losses[best_epoch]:
            best_epoch = epoch
            save_checkpoint(model, optimizer, val_loss, f'resnet/fc_{fc_size}', f'epoch{epoch+1}')

        if early_stopper.early_stop(val_loss):
            print(f'Early stopping on epoch {epoch + 1}')
            break
    
    fig, ax = plt.subplots(1, 2, figsize=(15, 5))
    fig.suptitle(f'FC Size: {fc_size}')
    
    ax[0].set_title('Loss')
    ax[1].set_title('Error Rate (validation)')
    
    epochs = [i+1 for i in range(len(train_losses))]
    ax[0].plot(epochs, train_losses, label='Train Loss', color='tab:blue')
    ax[0].plot(epochs, val_losses, label='Val Loss', color='tab:orange')
    ax[1].plot(epochs, err_rates, label='Error Rate', color='tab:blue')
    ax2 = ax[1].twinx()
    ax2.plot(epochs, f1_scores, color='tab:orange', label='F1 Score')
    
    epochs = [i for i in range(0, len(train_losses)+1, 5)]
    ax[0].set_xlabel('Epoch')
    ax[0].set_xticks(epochs)
    ax[0].set_ylabel('Loss')
    ax[0].legend()
    ax[1].set_xlabel('Epoch')
    ax[1].set_xticks(epochs)
    ax[1].set_ylabel('Error Rate')
    ax[1].legend()
    ax2.set_ylabel('F1 Score')
    ax2.tick_params(axis='y', labelcolor='tab:orange')
    # Add legend
    lines, labels = ax[1].get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax2.legend(lines + lines2, labels + labels2, loc='best')

    plt.show()
    fig_dir = os.path.join(FIGURES_DIR, f'resnet')
    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir)
    fig_path = os.path.join(fig_dir, f'fc_{fc_size}.png')
    
    fig.savefig(fig_path)

    best_models.append({
        'fc_size': fc_size,
        'best_epoch': best_epoch,
        'train_loss': train_losses[best_epoch],
        'val_loss': val_losses[best_epoch],
        'error_rate': err_rates[best_epoch],
        'f1_score': f1_scores[best_epoch],
        'fig_path': fig_path,
    })

Running Training for 5 epochs
>>>>>>>>     FC Size: 1     <<<<<<<<


                                                                            

KeyboardInterrupt: 

In [None]:
# model = ResnetClassifier(fc_size=1).to(device)
# criterion = nn.BCEWithLogitsLoss().to(device)
# optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
# model = load_checkpoint(model, optimizer, 'fc_size_1', 'TrainingEpoch05')[0]

In [None]:
# # Define the path to the test annotations file and test image directory
# TEST_ANNO_FILE = DATA_PATH + 'sample_submission.csv'
# TEST_IMG_DIR = DATA_PATH + 'test/test/'

# # Create the test dataset object
# test_data = CactusDataset(TEST_ANNO_FILE, TEST_IMG_DIR, transform=transform)

# # Print the shape of the first image in the test dataset
# print(test_data[0][0].shape)

# # Create the test dataloader
# test_dataloader = torch.utils.data.DataLoader(test_data, BATCH_SIZE, shuffle=False)

# # Compute the predictions
# predictions = []
# model.eval()
# with torch.no_grad():
#     for X, _ in tqdm(test_dataloader, desc='Testing', leave=False):
#         X = X.to(device)
#         pred = model(X.float())
#         pred = nn.Sigmoid()(pred) > 0.5
#         predictions += pred.cpu().numpy().flatten().tolist()

torch.Size([3, 224, 224])


Testing:   0%|          | 0/63 [00:00<?, ?it/s]

                                                        

In [None]:
# predictions = torch.tensor(predictions).int()

In [None]:
# print(predictions.shape)
# print(predictions.sum())
# tmp = torch.tensor(1) - predictions
# print(tmp.sum())

torch.Size([4000])
tensor(2999)
tensor(1001)


In [None]:
# from torchsummary import summary

# summary(model, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 16, 16]           9,408
       BatchNorm2d-2           [-1, 64, 16, 16]             128
              ReLU-3           [-1, 64, 16, 16]               0
         MaxPool2d-4             [-1, 64, 8, 8]               0
            Conv2d-5             [-1, 64, 8, 8]          36,864
       BatchNorm2d-6             [-1, 64, 8, 8]             128
              ReLU-7             [-1, 64, 8, 8]               0
            Conv2d-8             [-1, 64, 8, 8]          36,864
       BatchNorm2d-9             [-1, 64, 8, 8]             128
             ReLU-10             [-1, 64, 8, 8]               0
       BasicBlock-11             [-1, 64, 8, 8]               0
           Conv2d-12             [-1, 64, 8, 8]          36,864
      BatchNorm2d-13             [-1, 64, 8, 8]             128
             ReLU-14             [-1, 6