In [1]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import pandas as pd
from sklearn.model_selection import train_test_split
import optuna
from torchvision import models

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data_train, data_test = train_test_split(pd.read_csv('annotation.txt'), 
                        test_size=0.1, 
                        random_state=102)
device = torch.device('cuda')
target_size = (224, 224)

In [3]:
from sklearn.preprocessing import LabelEncoder

In [4]:
enc = LabelEncoder()
data_train['character'] = enc.fit_transform(data_train['character'])
data_test['character'] = enc.transform(data_test['character'])

In [5]:
data_train['character'].nunique()

18

In [6]:
transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

In [7]:
indexes = data_train.index

In [8]:
import os

In [9]:
class CustomImage():
    def __init__(self, data, transform):
        self.transform = transform
        self.data = data
    
    def __len__(self):
        return len(self.data['character'])

    def __getitem__(self, idx):
        label_idx = self.data.iloc[idx, -1]
        img_path = self.data.iloc[idx, 0]
        img_path = img_path.replace("characters2", "characters")
        img_path = os.path.normpath(img_path)  # Нормализуем путь для ОС
        image_idx = Image.open(img_path)
        image_transformed = self.transform(image_idx)
        return image_transformed, label_idx

In [10]:
dataset = CustomImage(data_test, transform)

In [11]:
batch_size = 32

In [12]:
dataset_train = CustomImage(data_train, transform)
load_train = DataLoader(dataset_train, 
                        batch_size=batch_size, 
                        shuffle=False)
dataset_test = CustomImage(data_test, transform)
load_test = DataLoader(dataset_test, 
                       batch_size=4, 
                       shuffle=False)

In [13]:
for images, labels in load_train:
    print("Размер батча изображений:", images.shape)
    print("Метки:", labels)
    break

Размер батча изображений: torch.Size([32, 3, 224, 224])
Метки: tensor([14,  1,  0,  0, 10,  0, 10,  0, 17, 16, 11, 14,  9,  7,  3,  7,  4,  3,
        12,  0, 17, 12,  2,  3,  3,  2,  3, 10, 10,  1, 10, 11])


In [14]:
class CNN(nn.Module):
    def __init__(self, dropout):
        super().__init__()
        self.conv1_1 = nn.Conv2d(3, 64, 3)
        self.conv1_2 = nn.Conv2d(64, 64, 3)
        self.maxpool = nn.MaxPool2d(2, 2)
        self.conv2_1 = nn.Conv2d(64, 128, 3)
        self.conv2_2 = nn.Conv2d(128, 128, 3)
        self.conv3_1 = nn.Conv2d(128, 256, 3)
        self.conv3_2 = nn.Conv2d(256, 256, 3)
        self.conv3_3 = nn.Conv2d(256, 256, 3)
        self.conv4_1 = nn.Conv2d(256, 512, 3)
        self.conv4_2 = nn.Conv2d(512, 512, 3)
        self.conv4_3 = nn.Conv2d(512, 512, 3)
        self.fc1 = nn.Linear(512*64, 2048)
        self.fc2 = nn.Linear(2048, 1024)
        self.fc3 = nn.Linear(1024, 18)
        self.drop = nn.Dropout(dropout)
    
    def forward(self, answer):
        answer = F.leaky_relu(self.conv1_1(answer), 0.005)
        answer = F.leaky_relu(self.conv1_2(answer), 0.005)
        answer = self.maxpool(answer)
        answer = F.leaky_relu(self.conv2_1(answer), 0.005)
        answer = F.leaky_relu(self.conv2_2(answer), 0.005)
        answer = self.maxpool(answer)
        answer = F.leaky_relu(self.conv3_1(answer), 0.005)
        answer = F.leaky_relu(self.conv3_2(answer), 0.005)
        answer = F.leaky_relu(self.conv3_3(answer), 0.005)
        answer = self.maxpool(answer)
        answer = self.drop(answer)
        answer = F.leaky_relu(self.conv4_1(answer), 0.005)
        answer = F.leaky_relu(self.conv4_2(answer), 0.005)
        answer = F.leaky_relu(self.conv4_3(answer), 0.005)
        answer = self.maxpool(answer)
        answer = answer.view(-1, 512 * 64)
        answer = self.drop(answer)
        answer = F.leaky_relu(self.fc1(answer), 0.005)
        answer = F.leaky_relu(self.fc2(answer), 0.005)
        answer = self.fc3(answer)
        return answer

In [15]:
def init_weights(m):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
    elif isinstance(m, nn.Linear):
        nn.init.normal_(m.weight, 0, 0.01)  # Оригинальный метод VGG
        nn.init.zeros_(m.bias)

In [145]:
'''def obj(trial):
    lr = trial.suggest_float('lr', 1e-5, 1e-1, log=True)
    wd = trial.suggest_float('wd', 1e-5, 1e-1, log=True)
    drop = trial.suggest_float('drop', 1e-5, 0.5)
    model = CNN(dropout=drop).to(device)
    model.apply(init_weights)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    for epochs in range(10):
        epoch_loss = 0.0 
        for images, labels in load_train:
            images_cuda = images.to(device)
            labels_cuda = labels.to(device)
            optimizer.zero_grad()
            predict = model(images_cuda)
            loss = criterion(predict, labels_cuda)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        trial.report(epoch_loss, epochs)
        if trial.should_prune():
            raise optuna.TrialPruned()
    return epoch_loss / len(load_train)'''

"def obj(trial):\n    lr = trial.suggest_float('lr', 1e-5, 1e-1, log=True)\n    wd = trial.suggest_float('wd', 1e-5, 1e-1, log=True)\n    drop = trial.suggest_float('drop', 1e-5, 0.5)\n    model = CNN(dropout=drop).to(device)\n    model.apply(init_weights)\n    criterion = nn.CrossEntropyLoss()\n    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)\n    for epochs in range(10):\n        epoch_loss = 0.0 \n        for images, labels in load_train:\n            images_cuda = images.to(device)\n            labels_cuda = labels.to(device)\n            optimizer.zero_grad()\n            predict = model(images_cuda)\n            loss = criterion(predict, labels_cuda)\n            loss.backward()\n            optimizer.step()\n            epoch_loss += loss.item()\n        trial.report(epoch_loss, epochs)\n        if trial.should_prune():\n            raise optuna.TrialPruned()\n    return epoch_loss / len(load_train)"

In [146]:
'''learn = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler(),pruner=optuna.pruners.MedianPruner(n_startup_trials=3))
learn.optimize(obj, n_trials=20)'''

"learn = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler(),pruner=optuna.pruners.MedianPruner(n_startup_trials=3))\nlearn.optimize(obj, n_trials=20)"

In [147]:
'''print("Лучшие параметры:", learn.best_params)
print("Лучший loss:", learn.best_value)'''

'print("Лучшие параметры:", learn.best_params)\nprint("Лучший loss:", learn.best_value)'

In [148]:
'''# График истории оптимизации
optuna.visualization.plot_optimization_history(learn)

# Важность гиперпараметров
optuna.visualization.plot_param_importances(learn)

# Зависимость loss от lr
optuna.visualization.plot_slice(learn, params=["lr"])'''

'# График истории оптимизации\noptuna.visualization.plot_optimization_history(learn)\n\n# Важность гиперпараметров\noptuna.visualization.plot_param_importances(learn)\n\n# Зависимость loss от lr\noptuna.visualization.plot_slice(learn, params=["lr"])'

In [None]:
model = CNN(dropout=0.06848746024680974).to(device)
model.apply(init_weights)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=9.920521505714478e-05, weight_decay=0.006146853390949883)
for epochs in range(200):
    epoch_loss = 0.0 
    for images, labels in load_train:
        images_cuda = images.to(device)
        labels_cuda = labels.to(device)
        optimizer.zero_grad()
        predict = model(images_cuda)
        loss = criterion(predict, labels_cuda)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(epoch_loss / len(load_train), epochs) #нечаянно запустил обучения сначала, в первый раз модель обучалась 4 часа

KeyboardInterrupt: 

In [None]:
correct = 0
total = 0

with torch.no_grad():  # Отключаем вычисление градиентов (экономия памяти)
    for images, labels in load_train:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)  # Индекс максимального значения
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy: {100 * correct / total:.2f}%")
correct = 0
total = 0

with torch.no_grad():  # Отключаем вычисление градиентов (экономия памяти)
    for images, labels in load_test:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)  # Индекс максимального значения
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy: {100 * correct / total:.2f}%") #актуальная точность для последнего обучения.

Accuracy: 91.75%
Accuracy: 85.06%


In [16]:
model = models.vgg16(pretrained=True)



In [17]:
for param in model.parameters():
    param.requires_grad = False
model.classifier[6] = nn.Linear(4096, 18)
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.classifier[6].parameters(), lr=0.0001)

In [18]:
for epochs in range(200):
    epoch_loss = 0.0 
    for images, labels in load_train:
        images_cuda = images.to(device)
        labels_cuda = labels.to(device)
        optimizer.zero_grad()
        predict = model(images_cuda)
        loss = criterion(predict, labels_cuda)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(epoch_loss / len(load_train), epochs)

2.659635594016627 0
2.371012123007523 1
2.233970253718527 2
2.114411577425505 3
2.0739042771489995 4
2.0021345684402867 5
1.965537829148142 6
1.9278491070396022 7
1.9011418869620875 8
1.8911702984257748 9
1.868479081204063 10
1.8532165351666903 11
1.837268072053006 12
1.8288496902114466 13
1.8157867990042034 14
1.8081754508771395 15
1.8032771248566477 16
1.7872418416173834 17
1.791586608635752 18
1.7683535569592526 19
1.7701343699505454 20
1.7647978450122632 21
1.752945015304967 22
1.737764483376553 23
1.7358796828671506 24
1.748011451018484 25
1.7384859455259223 26
1.7236569530085513 27
1.7032738233867444 28
1.7112474454076667 29
1.715658740620864 30
1.7168565367397508 31
1.7034600270421882 32
1.7103132116167168 33
1.6911966311304192 34
1.6982233455306606 35
1.6719858495812667 36
1.6680999228828832 37
1.6762843979032416 38
1.6959572654021413 39
1.6881762805737948 40
1.6774518408273396 41
1.678440807367626 42
1.6741492484745226 43
1.674139668439564 44
1.6686302448573866 45
1.6593846954

In [20]:
correct = 0
total = 0

with torch.no_grad():  # Отключаем вычисление градиентов (экономия памяти)
    for images, labels in load_train:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)  # Индекс максимального значения
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy: {100 * correct / total:.2f}%")
correct = 0
total = 0

with torch.no_grad():  # Отключаем вычисление градиентов (экономия памяти)
    for images, labels in load_test:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)  # Индекс максимального значения
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy: {100 * correct / total:.2f}%") #актуальная точность для последнего обучения.

Accuracy: 52.49%
Accuracy: 46.75%
