In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models
from torch.utils.data import DataLoader, random_split, ConcatDataset

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [19]:
torch.__version__

'2.4.1+cu118'

In [21]:
transform_original = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

transform_augmented = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

original_dataset = datasets.ImageFolder(root='splitted/train', transform=transform_original)
augmented_dataset = datasets.ImageFolder(root='splitted/train', transform=transform_augmented)

full_dataset = ConcatDataset([original_dataset, augmented_dataset])

train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

val_dataset.dataset.transform = transform_original

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

In [24]:
len(train_loader)*32

4672

In [27]:
from torchvision.models import ResNet18_Weights

model = models.resnet18(weights=ResNet18_Weights.DEFAULT)


num_features = model.fc.in_features
model.fc = nn.Linear(num_features, len(original_dataset.classes))
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

def train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct_train += (preds == labels).sum().item()
            total_train += labels.size(0)
        
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = correct_train / total_train
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")
        
        # Валидация модели
        model.eval()
        val_loss = 0.0
        correct_val = 0
        total_val = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                _, preds = torch.max(outputs, 1)
                correct_val += (preds == labels).sum().item()
                total_val += labels.size(0)
        
        val_loss /= len(val_loader.dataset)
        val_acc = correct_val / total_val
        print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}")
    
    return model

In [28]:
trained_model = train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=30)

Epoch [1/30], Loss: 1.3185, Accuracy: 0.6614
Validation Loss: 0.6167, Validation Accuracy: 0.8413
Epoch [2/30], Loss: 0.4386, Accuracy: 0.8913
Validation Loss: 0.4222, Validation Accuracy: 0.8877
Epoch [3/30], Loss: 0.3101, Accuracy: 0.9198
Validation Loss: 0.3545, Validation Accuracy: 0.9108
Epoch [4/30], Loss: 0.2545, Accuracy: 0.9318
Validation Loss: 0.3101, Validation Accuracy: 0.9134
Epoch [5/30], Loss: 0.2401, Accuracy: 0.9325
Validation Loss: 0.2958, Validation Accuracy: 0.9160
Epoch [6/30], Loss: 0.2000, Accuracy: 0.9460
Validation Loss: 0.2878, Validation Accuracy: 0.9151
Epoch [7/30], Loss: 0.1929, Accuracy: 0.9468
Validation Loss: 0.2726, Validation Accuracy: 0.9245
Epoch [8/30], Loss: 0.1791, Accuracy: 0.9554
Validation Loss: 0.2940, Validation Accuracy: 0.9151
Epoch [9/30], Loss: 0.1830, Accuracy: 0.9485
Validation Loss: 0.2568, Validation Accuracy: 0.9357
Epoch [10/30], Loss: 0.1585, Accuracy: 0.9550
Validation Loss: 0.2512, Validation Accuracy: 0.9271
Epoch [11/30], Loss

In [29]:
torch.save(trained_model.state_dict(), 'resnet18_finetuned1.pth')

In [31]:
import os
import pandas as pd
from PIL import Image

transform_predict = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
predict_folder = 'splitted/test'

submission_path = 'submission.csv'
submission_df = pd.read_csv(submission_path)

image_extensions = ['.jpg', '.jpeg', '.png']

with torch.no_grad():
    for idx, row in submission_df.iterrows():
        image_id = row['id']
        img_path = None
        
        for ext in image_extensions:
            potential_path = os.path.join(predict_folder, f"{image_id}{ext}")
            if os.path.exists(potential_path):
                img_path = potential_path
                break
        
        if img_path:
            image = Image.open(img_path).convert('RGB')
            image = transform_predict(image).unsqueeze(0).to(device)

            outputs = model(image)
            _, predicted = torch.max(outputs, 1)
            predicted_label = datasets.ImageFolder(root='splitted/train').classes[predicted.item()]

            submission_df.at[idx, 'label'] = predicted_label
        else:
            print(f"Image {image_id} not found in {predict_folder} with supported extensions")

label_dict = {
    "Ace": 0,
    "Akainu": 1,
    "Brook": 2,
    "Chopper": 3,
    "Crocodile": 4,
    "Franky": 5,
    "Jinbei": 6,
    "Kurohige": 7,
    "Law": 8,
    "Luffy": 9,
    "Mihawk": 10,
    "Nami": 11,
    "Rayleigh": 12,
    "Robin": 13,
    "Sanji": 14,
    "Shanks": 15,
    "Usopp": 16,
    "Zoro": 17
}

submission_df['label'] = submission_df['label'].map(label_dict)

submission_df.to_csv(submission_path, index=False)




In [32]:
transform_augmented_v2 = transforms.Compose([
    transforms.RandomResizedCrop(224),  # Случайное изменение размера и обрезка
    transforms.RandomHorizontalFlip(),  # Случайное горизонтальное отражение
    transforms.RandomRotation(30),  # Случайное вращение изображений на угол до 30 градусов
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),  # Случайное смещение изображений
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Изменение яркости, контраста, насыщенности и оттенка
    transforms.GaussianBlur(kernel_size=5),  # Добавление случайного гауссова размытия
    transforms.RandomPerspective(distortion_scale=0.5, p=0.3, interpolation=3),  # Случайная перспектива с определенной вероятностью
    transforms.ToTensor(),  # Преобразование в тензор
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Нормализация
])
augmented_dataset_v2 = datasets.ImageFolder(root='splitted/train', transform=transform_augmented_v2)
full_dataset = ConcatDataset([original_dataset, augmented_dataset, augmented_dataset_v2])

train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

val_dataset.dataset.transform = transform_original

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

In [33]:
model1 = models.resnet18(weights=ResNet18_Weights.DEFAULT)


num_features = model1.fc.in_features
model1.fc = nn.Linear(num_features, len(original_dataset.classes))
model1 = model1.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model1.parameters(), lr=1e-4)

In [34]:
trained_model = train_model(model1, criterion, optimizer, train_loader, val_loader, num_epochs=20)

Epoch [1/20], Loss: 1.3264, Accuracy: 0.6355
Validation Loss: 0.6272, Validation Accuracy: 0.8245
Epoch [2/20], Loss: 0.5822, Accuracy: 0.8395
Validation Loss: 0.4753, Validation Accuracy: 0.8708
Epoch [3/20], Loss: 0.4526, Accuracy: 0.8675
Validation Loss: 0.4304, Validation Accuracy: 0.8759
Epoch [4/20], Loss: 0.3856, Accuracy: 0.8919
Validation Loss: 0.3670, Validation Accuracy: 0.8925
Epoch [5/20], Loss: 0.3470, Accuracy: 0.9032
Validation Loss: 0.3650, Validation Accuracy: 0.8959
Epoch [6/20], Loss: 0.3203, Accuracy: 0.9078
Validation Loss: 0.3246, Validation Accuracy: 0.8982
Epoch [7/20], Loss: 0.2865, Accuracy: 0.9168
Validation Loss: 0.3106, Validation Accuracy: 0.9085
Epoch [8/20], Loss: 0.2754, Accuracy: 0.9195
Validation Loss: 0.3002, Validation Accuracy: 0.9160
Epoch [9/20], Loss: 0.2589, Accuracy: 0.9244
Validation Loss: 0.3365, Validation Accuracy: 0.9017
Epoch [10/20], Loss: 0.2461, Accuracy: 0.9291
Validation Loss: 0.2644, Validation Accuracy: 0.9217
Epoch [11/20], Loss

In [36]:
transform_predict = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

predict_folder = 'splitted/test'

submission_path = 'submission.csv'
submission_df = pd.read_csv(submission_path)

image_extensions = ['.jpg', '.jpeg', '.png']

with torch.no_grad():
    for idx, row in submission_df.iterrows():
        image_id = row['id']
        img_path = None
        
        for ext in image_extensions:
            potential_path = os.path.join(predict_folder, f"{image_id}{ext}")
            if os.path.exists(potential_path):
                img_path = potential_path
                break
        
        if img_path:
            image = Image.open(img_path).convert('RGB')
            image = transform_predict(image).unsqueeze(0).to(device)

            outputs = model1(image)
            _, predicted = torch.max(outputs, 1)
            predicted_label = datasets.ImageFolder(root='splitted/train').classes[predicted.item()]

            submission_df.at[idx, 'label'] = predicted_label
        else:
            print(f"Image {image_id} not found in {predict_folder} with supported extensions")

label_dict = {
    "Ace": 0,
    "Akainu": 1,
    "Brook": 2,
    "Chopper": 3,
    "Crocodile": 4,
    "Franky": 5,
    "Jinbei": 6,
    "Kurohige": 7,
    "Law": 8,
    "Luffy": 9,
    "Mihawk": 10,
    "Nami": 11,
    "Rayleigh": 12,
    "Robin": 13,
    "Sanji": 14,
    "Shanks": 15,
    "Usopp": 16,
    "Zoro": 17
}

submission_df['label'] = submission_df['label'].map(label_dict)

submission_df.to_csv(submission_path, index=False)

  submission_df.at[idx, 'label'] = predicted_label


In [37]:
torch.save(trained_model.state_dict(), 'resnet18_finetuned2.pth')

In [39]:
from torchvision.models import ResNet50_Weights

model2 = models.resnet50(weights=ResNet50_Weights.DEFAULT)


num_features = model2.fc.in_features
model2.fc = nn.Linear(num_features, len(original_dataset.classes))
model2 = model2.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model2.parameters(), lr=1e-4)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to C:\Users\vitya/.cache\torch\hub\checkpoints\resnet50-11ad3fa6.pth
100.0%


In [41]:
trained_model = train_model(model2, criterion, optimizer, train_loader, val_loader, num_epochs=10)

Epoch [1/10], Loss: 0.3605, Accuracy: 0.8992
Validation Loss: 0.3117, Validation Accuracy: 0.9097
Epoch [2/10], Loss: 0.2904, Accuracy: 0.9165
Validation Loss: 0.2680, Validation Accuracy: 0.9228
Epoch [3/10], Loss: 0.2475, Accuracy: 0.9274
Validation Loss: 0.2438, Validation Accuracy: 0.9251
Epoch [4/10], Loss: 0.2350, Accuracy: 0.9308
Validation Loss: 0.2389, Validation Accuracy: 0.9302
Epoch [5/10], Loss: 0.2231, Accuracy: 0.9352
Validation Loss: 0.2152, Validation Accuracy: 0.9400
Epoch [6/10], Loss: 0.2145, Accuracy: 0.9364
Validation Loss: 0.1723, Validation Accuracy: 0.9525
Epoch [7/10], Loss: 0.1832, Accuracy: 0.9485
Validation Loss: 0.1925, Validation Accuracy: 0.9411
Epoch [8/10], Loss: 0.1768, Accuracy: 0.9475
Validation Loss: 0.2149, Validation Accuracy: 0.9394
Epoch [9/10], Loss: 0.1615, Accuracy: 0.9535
Validation Loss: 0.2236, Validation Accuracy: 0.9383
Epoch [10/10], Loss: 0.1681, Accuracy: 0.9507
Validation Loss: 0.1667, Validation Accuracy: 0.9480


In [42]:
transform_predict = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

predict_folder = 'splitted/test'

submission_path = 'submission.csv'
submission_df = pd.read_csv(submission_path)

image_extensions = ['.jpg', '.jpeg', '.png']

with torch.no_grad():
    for idx, row in submission_df.iterrows():
        image_id = row['id']
        img_path = None
        
        for ext in image_extensions:
            potential_path = os.path.join(predict_folder, f"{image_id}{ext}")
            if os.path.exists(potential_path):
                img_path = potential_path
                break
        
        if img_path:
            image = Image.open(img_path).convert('RGB')
            image = transform_predict(image).unsqueeze(0).to(device)

            outputs = model2(image)
            _, predicted = torch.max(outputs, 1)
            predicted_label = datasets.ImageFolder(root='splitted/train').classes[predicted.item()]

            submission_df.at[idx, 'label'] = predicted_label
        else:
            print(f"Image {image_id} not found in {predict_folder} with supported extensions")

label_dict = {
    "Ace": 0,
    "Akainu": 1,
    "Brook": 2,
    "Chopper": 3,
    "Crocodile": 4,
    "Franky": 5,
    "Jinbei": 6,
    "Kurohige": 7,
    "Law": 8,
    "Luffy": 9,
    "Mihawk": 10,
    "Nami": 11,
    "Rayleigh": 12,
    "Robin": 13,
    "Sanji": 14,
    "Shanks": 15,
    "Usopp": 16,
    "Zoro": 17
}

submission_df['label'] = submission_df['label'].map(label_dict)

submission_df.to_csv(submission_path, index=False)

  submission_df.at[idx, 'label'] = predicted_label
