In [4]:
# IMPORTS

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from pathlib import Path
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import v2
from torchvision import models


# DATA SETUP

DATA_DIR = Path("/kaggle/input/feathers-in-focus-model/aml-2025-feathers-in-focus")

TRANSFORM_TRAIN = v2.Compose([
    v2.ToImage(),
    v2.RandomResizedCrop(224, scale=(0.6, 1.0)),
    v2.RandomHorizontalFlip(p=0.5),
    v2.RandomRotation(degrees=10),
    v2.ColorJitter(brightness=0.2, contrast=0.2),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

TRANSFORM_VAL = v2.Compose([
    v2.ToImage(),
    v2.Resize((224, 224)),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Laad attributen
attributes = np.load(DATA_DIR / "attributes.npy")
attributes = torch.tensor(attributes, dtype=torch.float32)
print(f"Attributes shape: {attributes.shape}")


# DATASET

class BirdDataset(Dataset):
    def __init__(self, df, search_root, transform, attributes, test_mode=False):
        self.df = df.reset_index(drop=True)
        self.search_root = search_root
        self.transform = transform
        self.attributes = attributes
        self.test_mode = test_mode
    
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        filename = Path(str(row["image_path"])).name
        path = self.search_root / filename.lstrip('/')
        image = Image.open(path).convert("RGB")
        image = self.transform(image)
        
        if self.test_mode:
            return image
        
        label = int(row["label"]) - 1
        attrs = self.attributes[label]
        return image, label, attrs


# DATA LOADERS

train_df = pd.read_csv(DATA_DIR / "train_images.csv")

torch.manual_seed(42)
indices = torch.randperm(len(train_df)).tolist()
train_size = int(0.8 * len(train_df))

train_dataset = BirdDataset(
    df=train_df.iloc[indices[:train_size]].reset_index(drop=True),
    search_root=DATA_DIR / "train_images",
    transform=TRANSFORM_TRAIN,
    attributes=attributes,
)
val_dataset = BirdDataset(
    df=train_df.iloc[indices[train_size:]].reset_index(drop=True),
    search_root=DATA_DIR / "train_images",
    transform=TRANSFORM_VAL,
    attributes=attributes,
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)


# MODEL - ResNet34 zonder pretrained weights

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        
        # ResNet34 zonder pretrained weights
        self.backbone = models.resnet34(weights=None)
        num_features = self.backbone.fc.in_features  # 512
        self.backbone.fc = nn.Identity()  # Verwijder originele classifier
        
        self.dropout = nn.Dropout(0.3)
        
        # Classification head
        self.fc = nn.Linear(num_features, 200)
        
        # Attribute head
        self.fc_attr = nn.Linear(num_features, 312)
    
    def forward(self, x):
        features = self.backbone(x)
        features = self.dropout(features)
        
        class_out = F.log_softmax(self.fc(features), dim=1)
        attr_out = torch.sigmoid(self.fc_attr(features))
        
        return class_out, attr_out


# TRAINING

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

model = Net().to(device)
print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}")

optimizer = optim.AdamW(model.parameters(), lr=0.0003, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=70)

EPOCHS = 70
ATTR_WEIGHT = 0.5
best_acc = 0.0

for epoch in range(1, EPOCHS + 1):
    model.train()
    total_loss = 0
    correct = 0
    
    for images, labels, target_attrs in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        target_attrs = target_attrs.to(device)
        
        optimizer.zero_grad()
        class_out, attr_out = model(images)
        
        loss_class = F.nll_loss(class_out, labels)
        loss_attr = F.binary_cross_entropy(attr_out, target_attrs)
        loss = loss_class + ATTR_WEIGHT * loss_attr
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        correct += (class_out.argmax(1) == labels).sum().item()
    
    train_acc = correct / len(train_loader.dataset)
    
    model.eval()
    val_correct = 0
    with torch.no_grad():
        for images, labels, _ in val_loader:
            images, labels = images.to(device), labels.to(device)
            class_out, _ = model(images)
            val_correct += (class_out.argmax(1) == labels).sum().item()
    
    val_acc = val_correct / len(val_loader.dataset)
    
    print(f"Epoch {epoch}/{EPOCHS} | Loss: {total_loss:.1f} | Train: {train_acc:.4f} | Val: {val_acc:.4f}")
    
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), "bird_cnn.pt")
        print(f"  ↳ Saved!")
    
    scheduler.step()

print(f"\nBest: {best_acc:.4f}")


# SUBMISSION MET TTA

model.load_state_dict(torch.load("bird_cnn.pt"))
model.eval()

test_df = pd.read_csv(DATA_DIR / "test_images_path.csv")

TTA_TRANSFORMS = [
    v2.Compose([
        v2.ToImage(),
        v2.Resize((224, 224)),
        v2.ToDtype(torch.float32, scale=True),
        v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),
    v2.Compose([
        v2.ToImage(),
        v2.Resize((224, 224)),
        v2.RandomHorizontalFlip(p=1.0),
        v2.ToDtype(torch.float32, scale=True),
        v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]),
]

predictions = []
with torch.no_grad():
    for idx in range(len(test_df)):
        row = test_df.iloc[idx]
        filename = Path(str(row["image_path"])).name
        path = DATA_DIR / "test_images" / filename.lstrip('/')
        image = Image.open(path).convert("RGB")
        
        all_outputs = []
        for transform in TTA_TRANSFORMS:
            img_t = transform(image).unsqueeze(0).to(device)
            output, _ = model(img_t)
            all_outputs.append(output)
        
        avg_output = torch.stack(all_outputs).mean(0)
        pred = avg_output.argmax(1).item() + 1
        predictions.append(pred)

submission = pd.DataFrame({"id": test_df["id"].values, "label": predictions})
submission.to_csv("submission.csv", index=False)
print(submission.head())
print("Done!")

Attributes shape: torch.Size([200, 312])
Device: cuda
Parameters: 21,547,328
Epoch 1/70 | Loss: 542.8 | Train: 0.0089 | Val: 0.0153
  ↳ Saved!
Epoch 2/70 | Loss: 507.5 | Train: 0.0226 | Val: 0.0229
  ↳ Saved!
Epoch 3/70 | Loss: 489.8 | Train: 0.0338 | Val: 0.0242
  ↳ Saved!
Epoch 4/70 | Loss: 471.9 | Train: 0.0475 | Val: 0.0394
  ↳ Saved!
Epoch 5/70 | Loss: 455.5 | Train: 0.0510 | Val: 0.0496
  ↳ Saved!
Epoch 6/70 | Loss: 441.2 | Train: 0.0650 | Val: 0.0725
  ↳ Saved!
Epoch 7/70 | Loss: 428.7 | Train: 0.0806 | Val: 0.0623
Epoch 8/70 | Loss: 413.2 | Train: 0.0901 | Val: 0.0649
Epoch 9/70 | Loss: 403.5 | Train: 0.1038 | Val: 0.0776
  ↳ Saved!
Epoch 10/70 | Loss: 384.8 | Train: 0.1217 | Val: 0.1107
  ↳ Saved!
Epoch 11/70 | Loss: 378.0 | Train: 0.1239 | Val: 0.1056
Epoch 12/70 | Loss: 365.6 | Train: 0.1484 | Val: 0.0840
Epoch 13/70 | Loss: 358.5 | Train: 0.1557 | Val: 0.1298
  ↳ Saved!
Epoch 14/70 | Loss: 344.4 | Train: 0.1701 | Val: 0.1705
  ↳ Saved!
Epoch 15/70 | Loss: 335.0 | Train: 0.1