### Imports

In [None]:
# Import necessary libraries for data handling, image processing and deep learning
import os
import pandas as pd
from PIL import Image
from glob import glob

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import Subset, DataLoader

import torchvision
import torchvision.transforms as transforms
from torchvision.models import efficientnet_v2_m

In [None]:
# Mount Google Drive to directly access dataset files (for Google colab)
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Paths

In [None]:
# Define paths to the training and test datasets as well as where submissions are stored
TRAIN_PATH = "/content/drive/MyDrive/ML Olympiad - TurtleVision Challenge/Data/train"
TEST_PATH = "/content/drive/MyDrive/ML Olympiad - TurtleVision Challenge/Data/test"
SUBMISSION_PATH = "/content/drive/MyDrive/ML Olympiad - TurtleVision Challenge/Submissions"

### Data preprocessing & splitting

In [None]:
# Move to the folder containing the challenge data
os.chdir("/content/drive/MyDrive/ML Olympiad - TurtleVision Challenge/Data/")

In [None]:
# Apply transformations to the training & validation images for augmentation
tr_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(), # Randomly flip images horizontally to increase dataset diversity
    transforms.RandomVerticalFlip(), # Randomly flip images vertically
    transforms.RandomRotation(degrees=(10, 60)), # Rotate images within the specified degree range
    transforms.ToTensor(), # Convert images to tensor format for PyTorch processing
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize images using standard normalization
])

# Load training & val data using the defined transformations
trainset = torchvision.datasets.ImageFolder(root='train', transform=tr_transform)

# Split the training data into training & val sets to evaluate model performance
train_size = int(0.8 * len(trainset))
val_size = len(trainset) - train_size
trainset, valset = torch.utils.data.random_split(trainset, [train_size, val_size])

# Create data loaders for both training and val sets to iterate through the dataset in batches (process multiple images in one iteration)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2) # Shuffling helps to prevent the model from learning the order of the examples
valloader = DataLoader(valset, batch_size=32, shuffle=False, num_workers=2)

### Modeling

In [None]:
# Load a pre-trained EfficientNet V2-M model: https://pytorch.org/vision/main/models/efficientnetv2.html
model = efficientnet_v2_m(weights=torchvision.models.EfficientNet_V2_M_Weights.DEFAULT) #DEPRECATED: pretrained=True

Downloading: "https://download.pytorch.org/models/efficientnet_v2_m-dc08266a.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_v2_m-dc08266a.pth
100%|██████████| 208M/208M [00:02<00:00, 78.4MB/s]


In [None]:
# Modify the last layer of the classifier with the appropriate number of output classes (6 in our case)
num_ftrs = model.classifier[-1].in_features
model.classifier[-1] = nn.Linear(num_ftrs, 6)

In [None]:
# Set the device to GPU if available for faster training, else use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the number of epochs the fine tuning of the pretrained model
num_epochs = 100

# Define the loss as cross-entropy for multi-class classification
criterion = nn.CrossEntropyLoss() # https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html

# Set up the optimizer to Adam with an initial learning rate
optimizer = optim.Adam(model.parameters(), lr=0.0001) # https://pytorch.org/docs/stable/generated/torch.optim.Adam.html#adam

# Define a lr scheduler to reduce it based on validation performance
lr_schedulerplateau = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.7, patience=20, verbose=True) # After a patience of 20 steps with no val loss improvement, it reduces the lr by multiplying it by 0.7

In [None]:
# Store loss & accuracy values through the training
train_losses, valid_losses = [], []
train_accs, valid_accs = [], []
valid_loss_min = np.Inf  # track change in validation loss

# Move the model to the device (CPU or GPU)
model = model.to(device)

# Training loop
for epoch in range(num_epochs):
    running_loss = 0.0
    train_loss = 0.0
    valid_loss = 0.0
    train_running_corrects= 0.0
    valid_running_corrects = 0.0

    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device) # Move data to the appropriate device (CPU or GPU)

        # Forward pass
        outputs = model(inputs) # Compute predicted outputs by passing inputs to the model
        loss = criterion(outputs, labels) # Calculate the batch loss

        # Backward and optimize
        optimizer.zero_grad() # Clear gradients from the previous step
        loss.backward() # Compute gradient of the loss with respect to model parameters
        optimizer.step() # Perform a single optimization step to update parameters

        train_loss += loss.item() * inputs.size(0)

        # Track the accuracy
        _, preds = torch.max(outputs, 1)
        train_running_corrects += torch.sum(preds == labels.data)

    # Set the model to evaluation mode for validation
    model.eval()
    for i, (images, targets) in enumerate(valloader):
        # Move the images and targets to the GPU
        images, targets = images.to(device), targets.to(device)

        # Perform inference (forward pass) without tracking gradients to save memory and computations
        with torch.no_grad():
            outputs = model(images)
        loss = criterion(outputs, targets) # Calculate the loss between the model's predictions and the true targets

        # Update the total validation loss for this batch
        valid_loss += loss.item() * images.size(0)

        # Adjust the lr based on the validation loss after the batch is processed
        lr_schedulerplateau.step(valid_loss)

        # Determine the predicted classes by selecting the class with the highest probability
        _, preds = torch.max(outputs, 1)

        # Calculate the number of correct predictions
        valid_running_corrects += torch.sum(preds == targets.data)

    # Store training & val losses and accuracies
    epoch_loss = train_loss / len(trainloader.dataset)
    epoch_acc = train_running_corrects.double() / len(trainloader.dataset)
    valid_loss = valid_loss / len(valloader.dataset)
    valid_acc = valid_running_corrects.double() / len(valloader.dataset)
    train_losses.append(epoch_loss)
    valid_losses.append(valid_loss)
    train_accs.append(epoch_acc)
    valid_accs.append(valid_acc)

    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc.item()*100:.2f}%, Val Loss: {valid_loss:.4f}, Val Acc: {valid_acc.item()*100:.2f}%")

    # Save the model if the validation loss has decreased
    if valid_loss <= valid_loss_min:
        print(f"Validation loss decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}).  Saving model ...")
        torch.save(model.state_dict(), SUBMISSION_PATH + '/model.pth')
        valid_loss_min = valid_loss

print("Training complete.")

Epoch [1/100], Train Loss: 0.0000, Train Acc: 100.00%, Val Loss: 0.0518, Val Acc: 98.89%
Validation loss decreased (inf --> 0.051800).  Saving model ...
Epoch [2/100], Train Loss: 0.0000, Train Acc: 100.00%, Val Loss: 0.0539, Val Acc: 98.89%
Epoch [3/100], Train Loss: 0.0000, Train Acc: 100.00%, Val Loss: 0.0232, Val Acc: 99.44%
Validation loss decreased (0.051800 --> 0.023169).  Saving model ...
Epoch [4/100], Train Loss: 0.0000, Train Acc: 100.00%, Val Loss: 0.0662, Val Acc: 98.89%
Epoch [5/100], Train Loss: 0.0001, Train Acc: 100.00%, Val Loss: 0.0154, Val Acc: 99.44%
Validation loss decreased (0.023169 --> 0.015414).  Saving model ...
Epoch [6/100], Train Loss: 0.0000, Train Acc: 100.00%, Val Loss: 0.0344, Val Acc: 98.89%
Epoch [7/100], Train Loss: 0.0000, Train Acc: 100.00%, Val Loss: 0.0567, Val Acc: 99.44%
Epoch [8/100], Train Loss: 0.0000, Train Acc: 100.00%, Val Loss: 0.0236, Val Acc: 98.89%
Epoch [9/100], Train Loss: 0.0000, Train Acc: 100.00%, Val Loss: 0.0041, Val Acc: 100.

### Prediction

In [None]:
# Define a custom dataset class to load the test data containing images of all classes
class CustomDataset(Dataset):
    def __init__(self, image_paths, transform=None):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        image_path = self.image_paths[index]
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image

In [None]:
# Define transform for test data
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load test images from their directory
test_images = glob('test/*.jpg')
test_dataset = CustomDataset(test_images, transform=test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
# Evaluate the model on the test set
predictions = []
model.eval()  # Set the model to evaluation mode
with torch.no_grad():  # Disable gradient tracking since we're only evaluating
    for images in test_loader:
        # Forward pass to get predictions
        images = images.to(device)
        outputs = model(images)
        _, predicted_classes = torch.max(outputs, 1)
        predictions.extend(predicted_classes.tolist())

In [None]:
# Get class names from the dataset
class_names = ["barrel_jellyfish", "compass_jellyfish", "lions_mane_jellyfish", "mauve_stinger_jellyfish", "moon_jellyfish", "plastic_pollution"]

# Map predicted class indices to class names
predicted_class_names = [class_names[class_index] for class_index in predictions]

In [None]:
# Pair filenames with predicted_class_names
filenames = [img.split('/')[-1].replace('.jpg', '') for img in test_images]
pairs = list(zip(filenames, predicted_class_names))

# Sort pairs based on filenames elements
sorted_pairs = sorted(pairs, key=lambda x: x[0])

# Unpack sorted pairs into separate lists
sorted_filenames, sorted_predicted_class_names = zip(*sorted_pairs)

### Submission

In [None]:
# Save predictions to a CSV file for submission
results = {'ImageID': sorted_filenames, 'PredictedClass': sorted_predicted_class_names}

In [None]:
# Save the results for submission
results_df = pd.DataFrame(results)
results_df.to_csv(SUBMISSION_PATH + '/effv2m_100ep_lrsch.csv', index=False)

### Thank you for your attention!

I look forward to any questions you might have about this notebook. Please feel free to contact me on: https://www.linkedin.com/in/wassim-chakroun/