In [40]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from torch.utils.data import Dataset
import torchvision.transforms as transforms
import torchvision.transforms.functional as F
import torch
import torch.nn as nn
import torchvision.models as models
from torch.utils.data import DataLoader
from sklearn.metrics import f1_score
from dvclive import Live
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import CosineAnnealingLR

In [41]:
LABELS_MAPPING = {
    "0": "Ace", "1": "Akainu", "2": "Brook", "3": "Chopper", "4": "Crocodile",
    "5": "Franky", "6": "Jinbei", "7": "Kurohige", "8": "Law", "9": "Luffy",
    "10": "Mihawk", "11": "Nami", "12": "Rayleigh", "13": "Robin", "14": "Sanji",
    "15": "Shanks", "16": "Usopp", "17": "Zoro"
}
TEST_DATA_PATH = "/home/an/spbu_deep_learning/classification/splitted/test"
TRAIN_DATA_PATH = "/home/an/spbu_deep_learning/classification/splitted"
CSV_PATH = "/home/an/spbu_deep_learning/classification/train_annotations.csv"

In [42]:
def count_images_per_class(csv_path, labels_mapping):
    data = pd.read_csv(csv_path)
    counts = {label: 0 for label in labels_mapping.values()}

    for label in data['label']:
        class_name = labels_mapping.get(str(label), None)
        if class_name is not None:
            counts[class_name] += 1

    return counts

count_images_per_class(CSV_PATH, LABELS_MAPPING)

{'Ace': 168,
 'Akainu': 167,
 'Brook': 178,
 'Chopper': 170,
 'Crocodile': 167,
 'Franky': 170,
 'Jinbei': 167,
 'Kurohige': 170,
 'Law': 175,
 'Luffy': 97,
 'Mihawk': 167,
 'Nami': 181,
 'Rayleigh': 167,
 'Robin': 167,
 'Sanji': 135,
 'Shanks': 168,
 'Usopp': 170,
 'Zoro': 132}

In [43]:
def split_train_test(csv_path, labels_mapping, test_size=0.2, random_state=42):
    data = pd.read_csv(csv_path)

    data['image_path'] = data['image_path'].apply(lambda p: os.path.normpath(p))

    data['split'] = 'train'

    for label in labels_mapping.keys():
        class_data = data[data['label'] == int(label)]
        _, test = train_test_split(class_data, test_size=test_size, random_state=random_state)

        data.loc[test.index, 'split'] = 'val'

    return data

NEW_CSV_PATH = "/home/an/spbu_deep_learning/classification/new_annotations.csv"

splitted_data = split_train_test(CSV_PATH, LABELS_MAPPING, test_size=0.2)
splitted_data.to_csv(NEW_CSV_PATH, index=False)


In [44]:
class OnePieceDataset(Dataset):
    def __init__(self, images_dir, csv_path=None, labels_json=None, transform=None, split=None):
        self.images_dir = images_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.is_train = csv_path is not None

        if self.is_train:
            import pandas as pd
            self.data = pd.read_csv(csv_path)

            if split:
                self.data = self.data[self.data['split'] == split]

            self.label_map = labels_json if isinstance(labels_json, dict) else None

            for _, row in self.data.iterrows():
                relative_path = row['image_path'].replace("\\", "/")
                image_path = os.path.join(images_dir, relative_path)
                image_path = os.path.normpath(image_path)
                self.image_paths.append(image_path)
                self.labels.append(row['label'])

        else:
            self.image_paths = [
                os.path.join(images_dir, fname)
                for fname in os.listdir(images_dir)
                if os.path.isfile(os.path.join(images_dir, fname))
            ]

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        if self.is_train:
            label = self.labels[idx]
            return image, label
        else:
            image_name = os.path.splitext(os.path.basename(image_path))[0]
            return image, image_name


In [45]:
CSV_PATH = "/home/an/spbu_deep_learning/classification/new_annotations.csv"

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),  
    transforms.RandomRotation(15),      
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 
])

train_dataset = OnePieceDataset(
    images_dir=TRAIN_DATA_PATH,
    csv_path=CSV_PATH,
    labels_json=LABELS_MAPPING,
    transform=transform,
    split="train"
)

val_dataset = OnePieceDataset(
    images_dir=TRAIN_DATA_PATH,
    csv_path=CSV_PATH,
    labels_json=LABELS_MAPPING,
    transform=transform,
    split="val"
)

print(len(val_dataset) / (len(train_dataset) + len(val_dataset)))

0.20233196159122085


In [46]:
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import models, transforms
import time

num_epochs = 10
batch_size = 16
learning_rate = 0.0001
weight_decay = 1e-5  

CSV_PATH = "/home/an/spbu_deep_learning/classification/train_annotations.csv"

train_dataset = OnePieceDataset(images_dir=TRAIN_DATA_PATH, csv_path=CSV_PATH, labels_json=LABELS_MAPPING, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, len(LABELS_MAPPING)) 
# model.classifier[1] = nn.Linear(model.last_channel, len(LABELS_MAPPING))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.01)
scheduler = CosineAnnealingLR(optimizer, T_max=12, eta_min=1e-6)

def train_one_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, preds = outputs.max(1)
        correct += preds.eq(labels).sum().item()
        total += labels.size(0)

    train_loss = running_loss / len(dataloader)
    train_acc = 100 * correct / total
    return train_loss, train_acc

def evaluate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():  
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, preds = outputs.max(1)
            correct += preds.eq(labels).sum().item()
            total += labels.size(0)

    val_loss = running_loss / len(dataloader)
    val_acc = 100 * correct / total
    return val_loss, val_acc

for epoch in range(num_epochs):
    start_time = time.time()  
    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
        
    scheduler.step()

    print(f"Epoch {epoch+1}/{num_epochs}")
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%")
        
    epoch_time = time.time() - start_time
    print(f"Epoch Time: {epoch_time:.2f} seconds")

torch.save(model.state_dict(), "one_piece_model.pth")




Epoch 1/10
Train Loss: 1.2942, Train Accuracy: 67.39%
Epoch Time: 22.25 seconds
Epoch 2/10
Train Loss: 0.3426, Train Accuracy: 92.49%
Epoch Time: 22.04 seconds
Epoch 3/10
Train Loss: 0.1728, Train Accuracy: 96.12%
Epoch Time: 21.56 seconds
Epoch 4/10
Train Loss: 0.1140, Train Accuracy: 97.87%
Epoch Time: 21.45 seconds
Epoch 5/10
Train Loss: 0.0663, Train Accuracy: 99.11%
Epoch Time: 21.40 seconds
Epoch 6/10
Train Loss: 0.0456, Train Accuracy: 99.52%
Epoch Time: 20.91 seconds
Epoch 7/10
Train Loss: 0.0327, Train Accuracy: 99.66%
Epoch Time: 21.03 seconds
Epoch 8/10
Train Loss: 0.0259, Train Accuracy: 99.83%
Epoch Time: 20.87 seconds
Epoch 9/10
Train Loss: 0.0182, Train Accuracy: 99.90%
Epoch Time: 20.89 seconds
Epoch 10/10
Train Loss: 0.0217, Train Accuracy: 99.90%
Epoch Time: 20.98 seconds


In [47]:
test_dataset = OnePieceDataset(images_dir=TEST_DATA_PATH, transform=transform)

test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

model.eval()

predictions = []
image_names = []

with torch.no_grad():
    for images, filenames in test_loader:
        images = images.to(device)

        outputs = model(images)

        _, preds = outputs.max(1)

        predictions.extend(preds.cpu().numpy())
        image_names.extend(filenames)

predictions_df = pd.DataFrame({
    'id': image_names,
    'label': predictions
})

predictions_df.to_csv('submission.csv', index=False)
