In [3]:

# IMPORTS


import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from pathlib import Path
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import v2
from torch.optim.lr_scheduler import StepLR


# DATA SETUP


DATA_DIR = Path("/kaggle/input/feathers-in-focus-model/aml-2025-feathers-in-focus")

TRANSFORM_TRAIN = v2.Compose([
    v2.ToImage(),
    v2.Resize((224, 224)),
    v2.RandomHorizontalFlip(p=0.5),
    v2.RandomRotation(degrees=10),
    v2.ColorJitter(brightness=0.2, contrast=0.2),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

TRANSFORM_VAL = v2.Compose([
    v2.ToImage(),
    v2.Resize((224, 224)),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Laad attributen
attributes = np.load(DATA_DIR / "attributes.npy")
attributes = torch.tensor(attributes, dtype=torch.float32)
print(f"Attributes shape: {attributes.shape}")


# DATASET


class BirdDataset(Dataset):
    def __init__(self, df, search_root, transform, attributes, test_mode=False):
        self.df = df.reset_index(drop=True)
        self.search_root = search_root
        self.transform = transform
        self.attributes = attributes
        self.test_mode = test_mode
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        filename = Path(str(row["image_path"])).name
        path = self.search_root / filename.lstrip('/')
        image = Image.open(path).convert("RGB")
        image = self.transform(image)
        
        if self.test_mode:
            return image
        
        label = int(row["label"]) - 1
        attrs = self.attributes[label]
        return image, label, attrs


# DATA LOADERS


train_df = pd.read_csv(DATA_DIR / "train_images.csv")

torch.manual_seed(42)
indices = torch.randperm(len(train_df)).tolist()
train_size = int(0.8 * len(train_df))

train_dataset = BirdDataset(
    df=train_df.iloc[indices[:train_size]].reset_index(drop=True),
    search_root=DATA_DIR / "train_images",
    transform=TRANSFORM_TRAIN,
    attributes=attributes,
)
val_dataset = BirdDataset(
    df=train_df.iloc[indices[train_size:]].reset_index(drop=True),
    search_root=DATA_DIR / "train_images",
    transform=TRANSFORM_VAL,
    attributes=attributes,
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


# MODEL - Verbeterd met BatchNorm + extra layer


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # Conv block 1: 3 -> 32
        self.conv1 = nn.Conv2d(3, 32, 3, 1)
        self.bn1 = nn.BatchNorm2d(32)
        
        # Conv block 2: 32 -> 64
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.bn2 = nn.BatchNorm2d(64)
        
        # Conv block 3: 64 -> 128
        self.conv3 = nn.Conv2d(64, 128, 3, 1)
        self.bn3 = nn.BatchNorm2d(128)
        
        # Conv block 4: 128 -> 128
        self.conv4 = nn.Conv2d(128, 128, 3, 1)
        self.bn4 = nn.BatchNorm2d(128)
        
        # Conv block 5: 128 -> 128 (NIEUW)
        self.conv5 = nn.Conv2d(128, 128, 3, 1)
        self.bn5 = nn.BatchNorm2d(128)
        
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        
        # FC layers (input: 128 * 5 * 5 = 3200)
        self.fc1 = nn.Linear(3200, 256)
        self.fc2 = nn.Linear(256, 200)
        self.fc_attr = nn.Linear(256, 312)
    
    def forward(self, x):
        # Block 1: 224 -> 222 -> 111
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        
        # Block 2: 111 -> 109 -> 54
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        
        # Block 3: 54 -> 52 -> 26
        x = self.conv3(x)
        x = self.bn3(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        
        # Block 4: 26 -> 24 -> 12
        x = self.conv4(x)
        x = self.bn4(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        
        # Block 5: 12 -> 10 -> 5 (NIEUW)
        x = self.conv5(x)
        x = self.bn5(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        features = self.dropout2(x)
        
        # Classification output
        class_out = self.fc2(features)
        class_out = F.log_softmax(class_out, dim=1)
        
        # Attribute output
        attr_out = torch.sigmoid(self.fc_attr(features))
        
        return class_out, attr_out


# TRAINING


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

model = Net().to(device)
print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}")

optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5)


EPOCHS = 70
ATTR_WEIGHT = 0.5
best_acc = 0.0

for epoch in range(1, EPOCHS + 1):
    model.train()
    total_loss = 0
    correct = 0
    
    for images, labels, target_attrs in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        target_attrs = target_attrs.to(device)
        
        optimizer.zero_grad()
        class_out, attr_out = model(images)
        
        loss_class = F.nll_loss(class_out, labels)
        loss_attr = F.binary_cross_entropy(attr_out, target_attrs)
        loss = loss_class + ATTR_WEIGHT * loss_attr
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        correct += (class_out.argmax(1) == labels).sum().item()
    
    train_acc = correct / len(train_loader.dataset)
    
    model.eval()
    val_correct = 0
    with torch.no_grad():
        for images, labels, _ in val_loader:
            images, labels = images.to(device), labels.to(device)
            class_out, _ = model(images)
            val_correct += (class_out.argmax(1) == labels).sum().item()
    
    val_acc = val_correct / len(val_loader.dataset)
    
    print(f"Epoch {epoch}/{EPOCHS} | Loss: {total_loss:.1f} | Train: {train_acc:.4f} | Val: {val_acc:.4f}")
    
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), "bird_cnn.pt")
        print(f"  ↳ Saved!")
    
    scheduler.step(val_acc) 

print(f"\nBest: {best_acc:.4f}")



# SUBMISSION MET TTA


model.load_state_dict(torch.load("bird_cnn.pt"))
model.eval()

test_df = pd.read_csv(DATA_DIR / "test_images_path.csv")

TTA_TRANSFORMS = [
    v2.Compose([
        v2.ToImage(),
        v2.Resize((224, 224)),
        v2.ToDtype(torch.float32, scale=True),
        v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),
    v2.Compose([
        v2.ToImage(),
        v2.Resize((224, 224)),
        v2.RandomHorizontalFlip(p=1.0),
        v2.ToDtype(torch.float32, scale=True),
        v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),
]

predictions = []
with torch.no_grad():
    for idx in range(len(test_df)):
        row = test_df.iloc[idx]
        filename = Path(str(row["image_path"])).name
        path = DATA_DIR / "test_images" / filename.lstrip('/')
        image = Image.open(path).convert("RGB")
        
        all_outputs = []
        for transform in TTA_TRANSFORMS:
            img_t = transform(image).unsqueeze(0).to(device)
            output, _ = model(img_t)
            all_outputs.append(output)
        
        avg_output = torch.stack(all_outputs).mean(0)
        pred = avg_output.argmax(1).item() + 1
        predictions.append(pred)

submission = pd.DataFrame({"id": test_df["id"].values, "label": predictions})
submission.to_csv("submission.csv", index=False)
print(submission.head())
print("Done!")

Attributes shape: torch.Size([200, 312])
Device: cuda
Parameters: 1,340,416
Epoch 1/70 | Loss: 548.8 | Train: 0.0086 | Val: 0.0102
  ↳ Saved!
Epoch 2/70 | Loss: 524.5 | Train: 0.0111 | Val: 0.0127
  ↳ Saved!
Epoch 3/70 | Loss: 512.6 | Train: 0.0194 | Val: 0.0254
  ↳ Saved!
Epoch 4/70 | Loss: 502.9 | Train: 0.0229 | Val: 0.0242
Epoch 5/70 | Loss: 495.3 | Train: 0.0248 | Val: 0.0382
  ↳ Saved!
Epoch 6/70 | Loss: 486.0 | Train: 0.0299 | Val: 0.0280
Epoch 7/70 | Loss: 479.7 | Train: 0.0373 | Val: 0.0420
  ↳ Saved!
Epoch 8/70 | Loss: 471.8 | Train: 0.0455 | Val: 0.0420
Epoch 9/70 | Loss: 468.4 | Train: 0.0411 | Val: 0.0407
Epoch 10/70 | Loss: 462.7 | Train: 0.0420 | Val: 0.0662
  ↳ Saved!
Epoch 11/70 | Loss: 455.4 | Train: 0.0500 | Val: 0.0534
Epoch 12/70 | Loss: 457.0 | Train: 0.0478 | Val: 0.0471
Epoch 13/70 | Loss: 453.4 | Train: 0.0522 | Val: 0.0522
Epoch 14/70 | Loss: 450.3 | Train: 0.0516 | Val: 0.0662
Epoch 15/70 | Loss: 447.6 | Train: 0.0538 | Val: 0.0776
  ↳ Saved!
Epoch 16/70 | Lo