In [1]:
import os
import numpy as np
import pandas as pd
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from timm import create_model
from sklearn.model_selection import StratifiedKFold
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau
from tqdm import tqdm
from torch.cuda.amp import autocast, GradScaler
import glob

  check_for_updates()


In [2]:
train_csv_path = "/kaggle/input/dat-301-m-ai-1802-ads-butterfly-classification/Training_set.csv"
train_df = pd.read_csv(train_csv_path)
TRAIN_DIR = "/kaggle/input/dat-301-m-ai-1802-ads-butterfly-classification/train/train"
train_df['filepaths'] = train_df['filename'].apply(lambda x: os.path.join(TRAIN_DIR, x))

labels = train_df['label'].unique()
label_mapping = {label: idx for idx, label in enumerate(labels)}
reverse_label_mapping = {idx: label for label, idx in label_mapping.items()}
train_df['label'] = train_df['label'].map(label_mapping)

In [3]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
train_df["fold"] = -1
for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, train_df["label"])):
    train_df.loc[val_idx, "fold"] = fold

train_data = train_df[train_df["fold"] != 0].reset_index(drop=True)
val_data = train_df[train_df["fold"] == 0].reset_index(drop=True)

In [4]:
train_transform = A.Compose([
    A.Resize(384, 384), 
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.Rotate(limit=30, p=0.8),
    A.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.2, p=0.8),
    A.RandomResizedCrop(384, 384, scale=(0.8, 1.0), p=0.5),
    A.CoarseDropout(max_holes=8, max_height=40, max_width=40, min_holes=2, min_height=10, min_width=10, fill_value=0, p=0.5),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

val_transform = A.Compose([
    A.Resize(384, 384),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

In [5]:
class ButterflyDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform
    
    def __len__(self):
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx]["filepaths"]
        image = np.array(Image.open(img_path).convert("RGB"))
        label = self.dataframe.iloc[idx]["label"]
        if self.transform:
            image = self.transform(image=image)["image"]
        return image, label

train_dataset = ButterflyDataset(train_data, transform=train_transform)
val_dataset = ButterflyDataset(val_data, transform=val_transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4) 
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4)

In [6]:
model = create_model("efficientnet_b3", pretrained=True, num_classes=len(labels))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.AdamW(model.parameters(), lr=0.00005, weight_decay=1e-4)
scheduler_cosine = CosineAnnealingLR(optimizer, T_max=15, eta_min=1e-6)
scheduler_reduce = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3, verbose=True)
scaler = GradScaler()

num_epochs = 50
best_acc = 0
patience = 5
early_stop_counter = 0


model.safetensors:   0%|          | 0.00/49.3M [00:00<?, ?B/s]

  scaler = GradScaler()


In [7]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
    train_acc = 100 * correct / total
    scheduler_cosine.step()
    

    model.eval()
    correct = 0
    total = 0
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
    val_acc = 100 * correct / total
    val_loss /= len(val_loader)
    
    print(f"Epoch {epoch+1}, Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%, Val Loss: {val_loss:.4f}")
    
    scheduler_reduce.step(val_acc)
    
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save(model.state_dict(), "best_model.pth")
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        if early_stop_counter >= patience:
            print("Early stopping triggered!")
            break

  with autocast():
Epoch 1/50: 100%|██████████| 250/250 [01:02<00:00,  4.02it/s]


Epoch 1, Train Acc: 20.65%, Val Acc: 61.90%, Val Loss: 2.4346


Epoch 2/50: 100%|██████████| 250/250 [00:59<00:00,  4.20it/s]


Epoch 2, Train Acc: 73.00%, Val Acc: 84.30%, Val Loss: 1.3444


Epoch 3/50: 100%|██████████| 250/250 [01:00<00:00,  4.11it/s]


Epoch 3, Train Acc: 87.33%, Val Acc: 90.40%, Val Loss: 1.1570


Epoch 4/50: 100%|██████████| 250/250 [01:03<00:00,  3.95it/s]


Epoch 4, Train Acc: 91.58%, Val Acc: 92.50%, Val Loss: 1.1017


Epoch 5/50: 100%|██████████| 250/250 [01:03<00:00,  3.92it/s]


Epoch 5, Train Acc: 94.67%, Val Acc: 93.90%, Val Loss: 1.0662


Epoch 6/50: 100%|██████████| 250/250 [01:03<00:00,  3.94it/s]


Epoch 6, Train Acc: 95.47%, Val Acc: 94.00%, Val Loss: 1.0406


Epoch 7/50: 100%|██████████| 250/250 [01:03<00:00,  3.97it/s]


Epoch 7, Train Acc: 96.50%, Val Acc: 94.20%, Val Loss: 1.0262


Epoch 8/50: 100%|██████████| 250/250 [01:02<00:00,  3.99it/s]


Epoch 8, Train Acc: 97.00%, Val Acc: 94.80%, Val Loss: 1.0150


Epoch 9/50: 100%|██████████| 250/250 [01:01<00:00,  4.04it/s]


Epoch 9, Train Acc: 97.80%, Val Acc: 94.90%, Val Loss: 1.0040


Epoch 10/50: 100%|██████████| 250/250 [01:00<00:00,  4.15it/s]


Epoch 10, Train Acc: 98.30%, Val Acc: 94.40%, Val Loss: 1.0013


Epoch 11/50: 100%|██████████| 250/250 [01:00<00:00,  4.15it/s]


Epoch 11, Train Acc: 98.50%, Val Acc: 94.70%, Val Loss: 0.9984


Epoch 12/50: 100%|██████████| 250/250 [01:00<00:00,  4.16it/s]


Epoch 12, Train Acc: 98.50%, Val Acc: 94.80%, Val Loss: 0.9928


Epoch 13/50: 100%|██████████| 250/250 [01:00<00:00,  4.16it/s]


Epoch 13, Train Acc: 98.53%, Val Acc: 94.90%, Val Loss: 0.9907


Epoch 14/50: 100%|██████████| 250/250 [01:00<00:00,  4.15it/s]


Epoch 14, Train Acc: 98.78%, Val Acc: 94.80%, Val Loss: 0.9925
Early stopping triggered!


In [8]:
test_dir = "/kaggle/input/dat-301-m-ai-1802-ads-butterfly-classification/test/test"
test_files = glob.glob(os.path.join(test_dir, "*.jpg"))
test_df = pd.DataFrame({"filepaths": test_files})
test_df["ID"] = test_df["filepaths"].apply(lambda x: os.path.basename(x))

tta_transform = A.Compose([
    A.Resize(384, 384),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=10, p=0.5),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

model.load_state_dict(torch.load("best_model.pth"))
model.eval()
predictions = []
img_ids = []
num_tta = 5 
with torch.no_grad():
    for img_path in tqdm(test_files, desc="Predicting with TTA"):
        image = np.array(Image.open(img_path).convert("RGB"))
        tta_preds = []
        for _ in range(num_tta):
            aug_image = tta_transform(image=image)["image"].unsqueeze(0).to(device)
            output = model(aug_image)
            tta_preds.append(output.softmax(dim=1))
        avg_pred = torch.stack(tta_preds).mean(dim=0)
        _, predicted = torch.max(avg_pred, 1)
        predictions.append(reverse_label_mapping[predicted.item()])
        img_ids.append(os.path.basename(img_path))

  model.load_state_dict(torch.load("best_model.pth"))
Predicting with TTA: 100%|██████████| 1499/1499 [02:38<00:00,  9.47it/s]


In [9]:
submission_df = pd.DataFrame({"ID": img_ids, "label": predictions})
submission_df.to_csv("/kaggle/working/submission.csv", index=False)

print("✅ Submission file created at /kaggle/working/submission.csv")

✅ Submission file created at /kaggle/working/submission.csv
