In [1]:
import pandas as pd
import numpy as np
import os
import json
import shutil

caption_dir = '/kaggle/input/tagged-anime-illustrations/danbooru-metadata/danbooru-metadata'

id_to_tags = {}

for filename in os.listdir(caption_dir):
    f_path = os.path.join(caption_dir, filename)
    if os.path.isfile(f_path):
        with open(f_path, 'r') as f:
            for line in f:
                data = json.loads(line.rstrip())
                tags = [x["name"] for x in data["tags"]]
                caption = " ".join(tags)
                id_to_tags[data["id"]] = caption 

In [2]:
from collections import Counter

tag_counter = Counter()
for tags in id_to_tags.values():
    for tag in tags.split(" "):
        tag = tag.strip()
        if tag:  # avoid empty strings
            tag_counter[tag] += 1
            
top_tags = [tag for tag, _ in tag_counter.most_common(2000)]

top_tags_set = set(top_tags)

top_2000_id_to_tags = {
    img_id: " ".join([t for t in tags.split(" ") if t in top_tags_set])
    for img_id, tags in id_to_tags.items()
}

In [3]:
from collections import Counter

filtered_tag_counter_2000 = Counter()

for tags in top_2000_id_to_tags.values():
    for tag in tags.split(" "):
        tag = tag.strip()
        if tag:  
            filtered_tag_counter_2000[tag] += 1

print("Total unique tags:", len(filtered_tag_counter_2000))

Total unique tags: 2000


In [4]:
print(top_2000_id_to_tags["1017000"])

1girl bow brown_hair detached_sleeves frilled_skirt frills hair_bow hair_ribbon hair_tubes hakurei_reimu highres midriff navel ofuda red_eyes red_skirt ribbon sarashi skirt skirt_set solo standing touhou yin_yang


In [5]:
unique_tags = list(filtered_tag_counter_2000.keys())

In [6]:
import os
import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

class DanbooruMultiLabelDataset(Dataset):
    def __init__(self, root_dir, label_dict, unique_tags, transform=None):
        """
        root_dir: folder with all subfolders of images
        label_dict: dict mapping 'image_id' -> list of tags
        unique_tags: list of all unique tags (defines the multi-label space)
        """
        self.root_dir = root_dir
        self.label_dict = label_dict
        self.tag_to_idx = {tag: i for i, tag in enumerate(unique_tags)}
        self.transform = transform

        # Collect image paths
        self.image_paths = []
        for subdir, _, files in os.walk(root_dir):
            for f in files:
                if f.lower().endswith((".jpg", ".jpeg", ".png")):
                    img_id = os.path.splitext(f)[0]
                    if img_id in label_dict:
                        self.image_paths.append(os.path.join(subdir, f))

    def __len__(self):
        return len(self.image_paths)

    def encode_tags(self, tags):
        vec = torch.zeros(len(self.tag_to_idx), dtype=torch.float32)
        for tag in tags:
            if tag in self.tag_to_idx:
                vec[self.tag_to_idx[tag]] = 1.0
        return vec

    def __getitem__(self, idx):
        path = self.image_paths[idx]
        img = Image.open(path).convert("RGB")

        img_id = os.path.splitext(os.path.basename(path))[0]
        tags = self.label_dict[img_id]
        label_vec = self.encode_tags(tags)

        if self.transform:
            img = self.transform(img)

        return img, label_vec

In [7]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

dataset = DanbooruMultiLabelDataset(
    root_dir="/kaggle/input/tagged-anime-illustrations/danbooru-images/danbooru-images",
    label_dict=top_2000_id_to_tags,
    unique_tags=unique_tags,
    transform=transform
)

dataloader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=4)

for images, label_vecs in dataloader:
    print(images.shape)      # (B, 3, 224, 224)
    print(label_vecs.shape)  # (B, num_tags)
    break

torch.Size([16, 3, 224, 224])
torch.Size([16, 2000])


In [8]:
from torch.utils.data import Subset
from sklearn.model_selection import train_test_split

RANDOM_SEED = 42

n = len(dataset)
half = n // 2

# ✅ Split into ordered halves first
first_half_idx = list(range(0, half))
second_half_idx = list(range(half, n))

first_half = Subset(dataset, first_half_idx)
second_half = Subset(dataset, second_half_idx)

print(f"First half: {len(first_half)} samples")
print(f"Second half: {len(second_half)} samples")

First half: 168516 samples
Second half: 168517 samples


In [9]:
def split_80_10_10(subset, seed):
    n = len(subset)
    indices = list(range(n))
    train_idx, temp_idx = train_test_split(
        indices, test_size=0.2, random_state=seed, shuffle=True
    )
    val_idx, test_idx = train_test_split(
        temp_idx, test_size=0.5, random_state=seed, shuffle=True
    )
    return (
        Subset(subset, train_idx),
        Subset(subset, val_idx),
        Subset(subset, test_idx),
    )

first_train, first_val, first_test = split_80_10_10(first_half, seed=RANDOM_SEED)
second_train, second_val, second_test = split_80_10_10(second_half, seed=RANDOM_SEED + 1)

In [10]:
def make_loader(subset, batch_size=64, shuffle=True):
    return DataLoader(
        subset,
        batch_size=batch_size,
        shuffle=shuffle,
        num_workers=4,
        pin_memory=True,
        persistent_workers=True
    )

train_loader_1 = make_loader(first_train)
val_loader_1   = make_loader(first_val, shuffle=False)
test_loader_1  = make_loader(first_test, shuffle=False)

train_loader_2 = make_loader(second_train)
val_loader_2   = make_loader(second_val, shuffle=False)
test_loader_2  = make_loader(second_test, shuffle=False)

In [11]:
import torch
# Load the model
model = torch.hub.load('RF5/danbooru-pretrained', 'resnet50')

Downloading: "https://github.com/RF5/danbooru-pretrained/zipball/master" to /root/.cache/torch/hub/master.zip
Downloading: "https://github.com/RF5/danbooru-pretrained/releases/download/v0.1/resnet50-13306192.pth" to /root/.cache/torch/hub/checkpoints/resnet50-13306192.pth
100%|██████████| 110M/110M [00:00<00:00, 175MB/s]  


In [12]:
print(model)

Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2d(64, 256

In [13]:
import torch.nn as nn
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_tags = len(unique_tags)
# Replace final layer
model[1][8] = nn.Linear(in_features=512, out_features=num_tags)
print(f"✅ Replaced final layer with Linear(512, {num_tags})")
model.to(device)

✅ Replaced final layer with Linear(512, 2000)


Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          (0): Conv2d(64, 256

In [14]:
# Load previous fine-tuned weights
checkpoint_path = "/kaggle/input/anime-tagging-data-preprocess-and-trainning/model_half1_finetuned.pth"
model.load_state_dict(torch.load(checkpoint_path, map_location=device))


<All keys matched successfully>

In [15]:
# Freeze feature extractor
for param in model[0].parameters():
    param.requires_grad = False

In [16]:
import torch.nn as nn
import torch.optim as optim
criterion = nn.BCEWithLogitsLoss()  # multi-label classification
optimizer = optim.Adam(model[1].parameters(), lr=1e-3)

In [17]:
from tqdm import tqdm
from sklearn.metrics import f1_score, precision_score, recall_score


num_epochs_head = 5
for epoch in range(num_epochs_head):
    model.train()
    train_loss = 0.0

    for imgs, labels in tqdm(train_loader_2, desc=f"Epoch {epoch+1}/{num_epochs_head} [Head]"):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels.float())
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * imgs.size(0)

    train_loss /= len(train_loader_2.dataset)

    # Validation
    model.eval()
    all_preds, all_labels = [], []
    val_loss = 0.0
    with torch.no_grad():
        for imgs, labels in val_loader_2:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels.float())
            val_loss += loss.item() * imgs.size(0)

            preds = (torch.sigmoid(outputs) > 0.5).int().cpu()
            all_preds.append(preds)
            all_labels.append(labels.cpu())

    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)
    f1 = f1_score(all_labels, all_preds, average="micro")
    precision = precision_score(all_labels, all_preds, average="micro")
    recall = recall_score(all_labels, all_preds, average="micro")
    val_loss /= len(val_loader_2.dataset)

    # print(f"Epoch [{epoch+1}/{num_epochs_head}] - Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | F1: {f1:.4f}"|)

    print(f"Epoch [{epoch+1}/{num_epochs_head}] - Train Loss: {train_loss:.4f} | "
          f"Val Loss: {val_loss:.4f} | F1: {f1:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")

# #Unfreeze backbone for fine-tuning

for param in model[0].parameters():
    param.requires_grad = True

optimizer = optim.Adam(model.parameters(), lr=1e-5)
num_epochs_finetune = 3


patience = 1  
best_f1 = 0.0
patience_counter = 0 
for epoch in range(num_epochs_finetune):
    model.train()
    train_loss = 0.0

    for imgs, labels in tqdm(train_loader_2, desc=f"Epoch {epoch+1}/{num_epochs_finetune} [Fine-tune]"):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels.float())
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * imgs.size(0)

    train_loss /= len(train_loader_2.dataset)
    
    # Validation
    model.eval()
    all_preds, all_labels = [], []
    val_loss = 0.0
    with torch.no_grad():
        for imgs, labels in val_loader_2:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            loss = criterion(outputs, labels.float())
            val_loss += loss.item() * imgs.size(0)

            preds = (torch.sigmoid(outputs) > 0.5).int().cpu()
            all_preds.append(preds)
            all_labels.append(labels.cpu())

    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)
    f1 = f1_score(all_labels, all_preds, average="micro")
    precision = precision_score(all_labels, all_preds, average="micro")
    recall = recall_score(all_labels, all_preds, average="micro")
    val_loss /= len(val_loader_2.dataset)

    # print(f"Epoch [{epoch+1}/{num_epochs_head}] - Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | F1: {f1:.4f}"|)

    print(f"Epoch [{epoch+1}/{num_epochs_finetune}] - Train Loss: {train_loss:.4f} | "
          f"Val Loss: {val_loss:.4f} | F1: {f1:.4f} | Precision: {precision:.4f} | Recall: {recall:.4f}")
    
        
    if f1 > best_f1:
        best_f1 = f1
        patience_counter = 0
    else:
        patience_counter += 1
        print(f" No improvement in F1. Patience: {patience_counter}/{patience}")
        if patience_counter >= patience:
            print("Early stopping triggered.")
            break
            

model.eval()
all_preds, all_labels = [], []

with torch.no_grad():
    for imgs, labels in tqdm(test_loader_2, desc="Testing"):
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)
        preds = (torch.sigmoid(outputs) > 0.5).int().cpu()
        all_preds.append(preds)
        all_labels.append(labels.cpu())

all_preds = torch.cat(all_preds)
all_labels = torch.cat(all_labels)


precision = precision_score(all_labels, all_preds, average="micro")
recall = recall_score(all_labels, all_preds, average="micro")
f1 = f1_score(all_labels, all_preds, average="micro")
print(f"✅ Test F1 Score after second half: {f1:.4f}")
print(f"Precision: {precision:.4f}, Recall: {recall:.4f}")



torch.save(model.state_dict(), "model_half2_finetuned.pth")
print("✅ Model saved after second half training")

Epoch 1/5 [Head]: 100%|██████████| 2107/2107 [09:45<00:00,  3.60it/s]


Epoch [1/5] - Train Loss: 0.0003 | Val Loss: 0.0019 | F1: 0.6163 | Precision: 0.4705 | Recall: 0.8929


Epoch 2/5 [Head]: 100%|██████████| 2107/2107 [07:44<00:00,  4.54it/s]


Epoch [2/5] - Train Loss: 0.0003 | Val Loss: 0.0024 | F1: 0.6163 | Precision: 0.4834 | Recall: 0.8499


Epoch 3/5 [Head]: 100%|██████████| 2107/2107 [07:46<00:00,  4.51it/s]


Epoch [3/5] - Train Loss: 0.0003 | Val Loss: 0.0021 | F1: 0.6714 | Precision: 0.5524 | Recall: 0.8556


Epoch 4/5 [Head]: 100%|██████████| 2107/2107 [07:45<00:00,  4.53it/s]


Epoch [4/5] - Train Loss: 0.0003 | Val Loss: 0.0003 | F1: 0.8035 | Precision: 0.7167 | Recall: 0.9141


Epoch 5/5 [Head]: 100%|██████████| 2107/2107 [07:44<00:00,  4.54it/s]


Epoch [5/5] - Train Loss: 0.0003 | Val Loss: 0.0004 | F1: 0.7802 | Precision: 0.7073 | Recall: 0.8697


Epoch 1/3 [Fine-tune]: 100%|██████████| 2107/2107 [23:32<00:00,  1.49it/s]


Epoch [1/3] - Train Loss: 0.0003 | Val Loss: 0.0003 | F1: 0.8030 | Precision: 0.7425 | Recall: 0.8741


Epoch 2/3 [Fine-tune]: 100%|██████████| 2107/2107 [23:35<00:00,  1.49it/s]


Epoch [2/3] - Train Loss: 0.0003 | Val Loss: 0.0003 | F1: 0.7958 | Precision: 0.7437 | Recall: 0.8556
 No improvement in F1. Patience: 1/1
Early stopping triggered.


Testing: 100%|██████████| 264/264 [01:15<00:00,  3.48it/s]


✅ Test F1 Score after second half: 0.7959
Precision: 0.7445, Recall: 0.8549
✅ Model saved after second half training
