In [69]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import pandas as pd
from sklearn.model_selection import train_test_split 

In [70]:
data_train, data_test = train_test_split(pd.read_csv('annotation.txt'), 
                        test_size=0.1, 
                        random_state=102)
device = torch.device('cuda')
target_size = (224, 224)

In [71]:
from sklearn.preprocessing import LabelEncoder

In [72]:
enc = LabelEncoder()
data_train['character'] = enc.fit_transform(data_train['character'])
data_test['character'] = enc.transform(data_test['character'])

In [73]:
data_train['character'].nunique()

18

In [74]:
transform = transforms.Compose([
    transforms.Resize(target_size),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))
])

In [75]:
indexes = data_train.index

In [76]:
import os

In [77]:
class CustomImage():
    def __init__(self, data, transform):
        self.transform = transform
        self.data = data
    
    def __len__(self):
        return len(self.data['character'])

    def __getitem__(self, idx):
        label_idx = self.data.iloc[idx, -1]
        img_path = self.data.iloc[idx, 0]
        img_path = img_path.replace("characters2", "characters")
        img_path = os.path.normpath(img_path)  # Нормализуем путь для ОС
        image_idx = Image.open(img_path)
        image_transformed = self.transform(image_idx)
        return image_transformed, label_idx

In [78]:
dataset = CustomImage(data_test, transform)

In [79]:
batch_size = 32

In [80]:
dataset_train = CustomImage(data_train, transform)
load_train = DataLoader(dataset_train, 
                        batch_size=batch_size, 
                        shuffle=False)
dataset_test = CustomImage(data_test, transform)
load_test = DataLoader(dataset_test, 
                       batch_size=4, 
                       shuffle=False)

In [81]:
for images, labels in load_train:
    print("Размер батча изображений:", images.shape)
    print("Метки:", labels)
    break

Размер батча изображений: torch.Size([32, 3, 224, 224])
Метки: tensor([14,  1,  0,  0, 10,  0, 10,  0, 17, 16, 11, 14,  9,  7,  3,  7,  4,  3,
        12,  0, 17, 12,  2,  3,  3,  2,  3, 10, 10,  1, 10, 11])


In [82]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1_1 = nn.Conv2d(3, 64, 3)
        self.conv1_2 = nn.Conv2d(64, 64, 3)
        self.maxpool = nn.MaxPool2d(2, 2)
        self.conv2_1 = nn.Conv2d(64, 128, 3)
        self.conv2_2 = nn.Conv2d(128, 128, 3)
        self.conv3_1 = nn.Conv2d(128, 256, 3)
        self.conv3_2 = nn.Conv2d(256, 256, 3)
        self.conv3_3 = nn.Conv2d(256, 256, 3)
        self.conv4_1 = nn.Conv2d(256, 512, 3)
        self.conv4_2 = nn.Conv2d(512, 512, 3)
        self.conv4_3 = nn.Conv2d(512, 512, 3)
        self.fc1 = nn.Linear(512*64, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, 18)
        self.drop = nn.Dropout(0.5)
    
    def forward(self, answer):
        answer = F.relu(self.conv1_1(answer))
        answer = F.relu(self.conv1_2(answer))
        answer = self.maxpool(answer)
        answer = F.relu(self.conv2_1(answer))
        answer = F.relu(self.conv2_2(answer))
        answer = self.maxpool(answer)
        answer = F.relu(self.conv3_1(answer))
        answer = F.relu(self.conv3_2(answer))
        answer = F.relu(self.conv3_3(answer))
        answer = self.maxpool(answer)
        answer = F.relu(self.conv4_1(answer))
        answer = F.relu(self.conv4_2(answer))
        answer = F.relu(self.conv4_3(answer))
        answer = self.maxpool(answer)
        answer = answer.view(-1, 512 * 64)
        answer = self.drop(answer)
        answer = F.relu(self.fc1(answer))
        answer = F.relu(self.fc2(answer))
        answer = F.softmax(self.fc3(answer))
        return answer

In [83]:
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.001)

In [84]:
for epochs in range(10):
        for images, labels in load_train:
            images_cuda = images.to(device)
            labels_cuda = labels.to(device)
            optimizer.zero_grad()
            predict = model(images_cuda)
            loss = criterion(predict, labels_cuda)
            loss.backward()
            optimizer.step()
        print(epochs, loss)

  answer = F.softmax(self.fc3(answer))


0 tensor(2.8387, device='cuda:0', grad_fn=<NllLossBackward0>)
1 tensor(2.8387, device='cuda:0', grad_fn=<NllLossBackward0>)
2 tensor(2.8387, device='cuda:0', grad_fn=<NllLossBackward0>)
3 tensor(2.8387, device='cuda:0', grad_fn=<NllLossBackward0>)
4 tensor(2.8387, device='cuda:0', grad_fn=<NllLossBackward0>)
5 tensor(2.8387, device='cuda:0', grad_fn=<NllLossBackward0>)
6 tensor(2.8387, device='cuda:0', grad_fn=<NllLossBackward0>)
7 tensor(2.8387, device='cuda:0', grad_fn=<NllLossBackward0>)
8 tensor(2.8387, device='cuda:0', grad_fn=<NllLossBackward0>)
9 tensor(2.8387, device='cuda:0', grad_fn=<NllLossBackward0>)


In [85]:
correct = 0
total = 0

with torch.no_grad():  # Отключаем вычисление градиентов (экономия памяти)
    for images, labels in load_test:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)  # Индекс максимального значения
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy: {100 * correct / total:.2f}%")

  answer = F.softmax(self.fc3(answer))


Accuracy: 8.88%
