In [1]:
import os
import random

import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import Dataset, DataLoader

from torchvision import datasets, transforms, models

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score

In [2]:
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [3]:
train_dir = '/kaggle/input/sheep-classification-challenge-2025/Sheep Classification Images/train'
test_dir = '/kaggle/input/sheep-classification-challenge-2025/Sheep Classification Images/test'
train_labels_csv = '/kaggle/input/sheep-classification-challenge-2025/Sheep Classification Images/train_labels.csv'

train_df = pd.read_csv(train_labels_csv)

corrections = { # Corrected label errors in the training set
    '9ed08b0c.jpg': 'Naeimi',
    '8939e4f2.jpg': 'Naeimi',
    '94c8adc1.jpg': 'Sawakni',
    '21dfb4da.jpg': 'Harri',
    '9d221581.jpg': 'Sawakni',
    'c238564e.jpg': 'Naeimi',
    '67b098e7.jpg': 'Harri',
    '719c75d0.jpg': 'Naeimi',
    'c9204f2e.jpg': 'Naeimi',
    '2b934ba9.jpg': 'Naeimi',
    'f6c9933f.jpg': 'Harri',
    '25d3aa13.jpg': 'Sawakni'
}

for fname, correct_label in corrections.items():
    train_df.loc[train_df['filename'] == fname, 'label'] = correct_label
    
train_df['filename'] = train_df['filename'].apply(lambda x: os.path.join(train_dir, x))

classes = sorted(train_df['label'].unique())
class_to_idx = {c:i for i,c in enumerate(classes)}
idx_to_class = {i:c for c,i in class_to_idx.items()}
train_df['label_idx'] = train_df['label'].map(class_to_idx)

test_filenames = sorted(os.listdir(test_dir))
test_df = pd.DataFrame({'filename': [os.path.join(test_dir, f) for f in test_filenames]})

In [4]:
config = {
    'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
    'n_classes': len(classes),
    'n_folds': 3,
    'n_epochs': 3,
    'batch_size': 32,
    'lr': 5e-4,
    'seed': 42,
}

# Custom Dataset Definition

In [5]:
class SheepDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform
        self.is_train = 'label' in self.df.columns

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.iloc[idx, 0]
        img = Image.open(img_path).convert('RGB')
        
        if self.transform: 
            img = self.transform(img)

        if self.is_train:
            return img, self.df.iloc[idx, 2] # image, label_idx
        return img, os.path.basename(img_path)


# Model Instantiation for Transfer Learning

In [6]:
def create_model(model_name):
    model = None
    
    if model_name == 'convnext_tiny':
        model = models.convnext_tiny(weights=models.ConvNeXt_Tiny_Weights.DEFAULT)
        for p in model.parameters():
            p.requires_grad = False
        for p in model.features[7].parameters():
            p.requires_grad = True
        for p in model.classifier.parameters():
            p.requires_grad = True
            
        in_features = model.classifier[2].in_features
        model.classifier[2] = nn.Linear(in_features, config['n_classes'])
        
    elif model_name == 'maxvit_t':
        model = models.maxvit_t(weights=models.MaxVit_T_Weights.DEFAULT)
        for p in model.parameters():
            p.requires_grad = False
        for p in model.classifier.parameters():
            p.requires_grad = True
        for p in model.blocks[2].parameters():
            p.requires_grad = True
        for p in model.blocks[3].parameters():
            p.requires_grad = True

        in_features = model.classifier[5].in_features
        model.classifier[5] = nn.Linear(in_features, config['n_classes'])

    return model 

# Training and Evaluation with Stratified Cross-Validation

In [7]:
def train(model, train_loader, val_loader, criterion, optimizer, scheduler, n_epochs, device):
    for epoch in range(1, n_epochs + 1):
        model.train()
        train_loss = 0.0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)

        train_loss /= len(train_loader.dataset)

        # Validation
        model.eval()
        val_loss = 0.0
        y_true = []
        y_pred = []

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)

                y_true.append(labels.cpu())
                y_pred.append(torch.argmax(outputs, dim=1).cpu())

        val_loss /= len(val_loader.dataset)
        y_true = torch.cat(y_true).numpy()
        y_pred = torch.cat(y_pred).numpy()
        f1 = f1_score(y_true, y_pred, average='macro')

        scheduler.step()

        print(f"Epoch {epoch:02d} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val F1: {f1:.4f}")
        
    
def evaluate(model, dataloader, device):
    model.eval()
    logits = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            logits.append(outputs.cpu())

    return torch.cat(logits, dim=0)
    

def run_cv(
    model_name,
    train_df,
    test_loader,
    train_transform,
    val_transform,
    n_folds = config['n_folds'],
    batch_size = config['batch_size'],
    n_epochs = config['n_epochs'],
    lr = config['lr'],
    device = config['device'],
    seed = config['seed'],
):
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=seed)

    all_val_logits = torch.zeros((len(train_df), config['n_classes']))
    test_logits_per_fold = []

    for fold, (train_idx, val_idx) in enumerate(
        skf.split(train_df, train_df["label_idx"]), start=1
    ):
        print(f"\nFold {fold}/{n_folds}")

        df_tr = train_df.iloc[train_idx]
        df_va = train_df.iloc[val_idx]

        train_loader = DataLoader(
            SheepDataset(df_tr, train_transform),
            batch_size=batch_size,
            shuffle=True,
        )
        val_loader = DataLoader(
            SheepDataset(df_va, val_transform),
            batch_size=batch_size,
            shuffle=False,
        )

        # instantiate model, criterion, optimizer, scheduler
        model = create_model(model_name).to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.AdamW(model.parameters(), lr=lr)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=n_epochs, eta_min=1e-6
        )

        # train + validate
        train(
            model,
            train_loader,
            val_loader,
            criterion,
            optimizer,
            scheduler,
            n_epochs,
            device,
        )
        
        # Store validation logits for current fold
        all_val_logits[val_idx] = evaluate(model, val_loader, device)

        # Generate test logits for current fold
        model.eval()
        fold_test_logits = []  # Per-batch logits for this fold
        with torch.no_grad():
            for imgs, _ in test_loader:
                imgs = imgs.to(device)
                outputs = model(imgs)
                fold_test_logits.append(outputs.cpu())
                
        # Store complete test logits for this fold
        test_logits_per_fold.append(torch.cat(fold_test_logits, dim=0))
        
    test_mean_logits = torch.stack(test_logits_per_fold).mean(dim=0)
    
    # Compute overall CV F1
    all_val_pred = torch.argmax(all_val_logits, dim=1).numpy()
    overall_f1 = f1_score(train_df['label_idx'].values, all_val_pred, average='macro')
    print(f"\nOverall CV F1: {overall_f1:.4f}")

    return all_val_logits, test_mean_logits

In [8]:
val_logits_dict = {}
test_logits_dict = {}

model_names = ['convnext_tiny', 'maxvit_t']
for i, model_name in enumerate(model_names):
    print(f'\n{model_name}')
    
    seed = config['seed'] + i
    seed_everything(seed)
    
    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        transforms.RandomErasing(p=0.1, scale=(0.1, 0.2), ratio=(0.2, 3)),
    ])
    
    val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    
    test_dataset = SheepDataset(test_df, val_transform)
    test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False)
    
    val_logits, test_logits = run_cv(model_name, train_df, test_loader, train_transform, val_transform, seed=seed)
    
    val_logits_dict[model_name] = val_logits
    test_logits_dict[model_name] = test_logits


convnext_tiny

Fold 1/3


Downloading: "https://download.pytorch.org/models/convnext_tiny-983f1562.pth" to /root/.cache/torch/hub/checkpoints/convnext_tiny-983f1562.pth
100%|██████████| 109M/109M [00:00<00:00, 210MB/s]


Epoch 01 | Train Loss: 0.8852 | Val Loss: 0.3192 | Val F1: 0.8519
Epoch 02 | Train Loss: 0.1180 | Val Loss: 0.2218 | Val F1: 0.8804
Epoch 03 | Train Loss: 0.0497 | Val Loss: 0.2033 | Val F1: 0.9133

Fold 2/3
Epoch 01 | Train Loss: 0.9294 | Val Loss: 0.2632 | Val F1: 0.9390
Epoch 02 | Train Loss: 0.1257 | Val Loss: 0.1777 | Val F1: 0.9348
Epoch 03 | Train Loss: 0.0541 | Val Loss: 0.1364 | Val F1: 0.9432

Fold 3/3
Epoch 01 | Train Loss: 0.9239 | Val Loss: 0.2400 | Val F1: 0.9002
Epoch 02 | Train Loss: 0.1460 | Val Loss: 0.1204 | Val F1: 0.9308
Epoch 03 | Train Loss: 0.0368 | Val Loss: 0.1159 | Val F1: 0.9295

Overall CV F1: 0.9294

maxvit_t

Fold 1/3


Downloading: "https://download.pytorch.org/models/maxvit_t-bc5ab103.pth" to /root/.cache/torch/hub/checkpoints/maxvit_t-bc5ab103.pth
100%|██████████| 119M/119M [00:00<00:00, 190MB/s]


Epoch 01 | Train Loss: 1.1138 | Val Loss: 0.3680 | Val F1: 0.8970
Epoch 02 | Train Loss: 0.2518 | Val Loss: 0.1630 | Val F1: 0.9526
Epoch 03 | Train Loss: 0.0867 | Val Loss: 0.1082 | Val F1: 0.9670

Fold 2/3
Epoch 01 | Train Loss: 0.9798 | Val Loss: 0.4513 | Val F1: 0.8345
Epoch 02 | Train Loss: 0.1639 | Val Loss: 0.3186 | Val F1: 0.8930
Epoch 03 | Train Loss: 0.0434 | Val Loss: 0.2117 | Val F1: 0.9332

Fold 3/3
Epoch 01 | Train Loss: 0.9707 | Val Loss: 0.2906 | Val F1: 0.9096
Epoch 02 | Train Loss: 0.1476 | Val Loss: 0.1471 | Val F1: 0.9415
Epoch 03 | Train Loss: 0.0373 | Val Loss: 0.1815 | Val F1: 0.9257

Overall CV F1: 0.9438


# Optimal Weighted Ensemble Search

In [9]:
best_weight = 0.0
best_score = 0.0
for w in np.linspace(0, 1, 101):
    ensemble = w * val_logits_dict[model_names[0]] + (1 - w) * val_logits_dict[model_names[1]]
    ensemble = torch.argmax(ensemble, dim=1)
    score = f1_score(train_df['label_idx'].values, ensemble, average="macro")
    
    if score > best_score:
        best_score = score
        best_weight = w
        print(f'F1 Score: {best_score:.5f} | Weight: {best_weight}')

print(f'Best Score: {best_score:.5f} | Best Weight: {best_weight}')

F1 Score: 0.94380 | Weight: 0.0
F1 Score: 0.94541 | Weight: 0.03
F1 Score: 0.94637 | Weight: 0.06
F1 Score: 0.94834 | Weight: 0.09
F1 Score: 0.95102 | Weight: 0.2
F1 Score: 0.95245 | Weight: 0.31
F1 Score: 0.95512 | Weight: 0.32
F1 Score: 0.95787 | Weight: 0.33
F1 Score: 0.95882 | Weight: 0.34
F1 Score: 0.96109 | Weight: 0.48
Best Score: 0.96109 | Best Weight: 0.48


# Create Submission from Best-Weighted Ensemble

In [10]:
ensemble = best_weight * test_logits_dict[model_names[0]] + (1 - best_weight) * test_logits_dict[model_names[1]]
pred = torch.argmax(ensemble, dim=1).numpy()

pred_labels = [idx_to_class[i] for i in pred]
submission = pd.DataFrame({
    'filename': test_filenames,
    'label': pred_labels,
})

submission.to_csv('submission.csv', index=False)
submission

Unnamed: 0,filename,label
0,0306fa89.jpg,Barbari
1,0345e3ca.jpg,Roman
2,0551a473.jpg,Sawakni
3,06e1783d.jpg,Goat
4,08b9981b.jpg,Barbari
...,...,...
139,f3e7b1fe.jpg,Roman
140,faadf33d.jpg,Roman
141,fbf2e74c.jpg,Sawakni
142,ff19c491.jpg,Sawakni


### 