In [17]:
import torch

print("CUDA доступна:", torch.cuda.is_available())
print("Количество GPU:", torch.cuda.device_count())
print("Имя устройства:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "Нет CUDA")

CUDA доступна: True
Количество GPU: 1
Имя устройства: NVIDIA GeForce MX150


In [18]:
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import seaborn as sns

from PIL import Image

In [19]:
# для воспроизводимости результатов
torch.manual_seed(42)
np.random.seed(42)

In [20]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [21]:
batch_size = 64
num_classes = 2
learning_rate = 0.001
num_epochs = 5
classes = ["cat", "dog"]

In [22]:
import os
import shutil
from pathlib import Path
import random

In [15]:
# Исходные пути
source_dir = Path("./additional/PetImages")          # здесь лежат папки cat/ и dog/
output_dir = Path("./additional/dataset")            # сюда сохраним train/ и test/

In [16]:
def find_corrupted_images(data_dir, extensions=('.jpg', '.jpeg', '.png')):
    corrupted = []
    for img_path in Path(data_dir).rglob('*'):
        if img_path.suffix.lower() in extensions:
            try:
                img = Image.open(img_path)
                img.verify()  # Быстрая проверка целостности
            except (IOError, OSError, Image.UnidentifiedImageError) as e:
                print(f"Битый файл: {img_path} — {e}")
                corrupted.append(str(img_path))
    return corrupted

find_corrupted_images('./additional/PetImages/dog')
find_corrupted_images('./additional/PetImages/cat')

Битый файл: additional\PetImages\dog\11702.jpg — cannot identify image file 'additional\\PetImages\\dog\\11702.jpg'




Битый файл: additional\PetImages\cat\666.jpg — cannot identify image file 'additional\\PetImages\\cat\\666.jpg'


['additional\\PetImages\\cat\\666.jpg']

In [17]:
# Создаём целевые папки
for split in ["train", "test"]:
    for cls in ["cat", "dog"]:
        (output_dir / split / cls).mkdir(parents=True, exist_ok=True)

# Параметры
train_ratio = 0.8  # 80% на обучение, 20% на тест

# Обрабатываем каждый класс
for cls in ["cat", "dog"]:
    src_cls_dir = source_dir / cls
    all_files = list(src_cls_dir.glob("*.jpg"))  # или *.png, если нужно
    random.shuffle(all_files)

    n_train = int(len(all_files) * train_ratio)
    train_files = all_files[:n_train]
    test_files = all_files[n_train:]

    # Копируем файлы (можно использовать move, если не нужно сохранять оригинал)
    for f in train_files:
        shutil.copy(f, output_dir / "train" / cls / f.name)
    for f in test_files:
        shutil.copy(f, output_dir / "test" / cls / f.name)

In [23]:
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # нормализация пикселей по статистике ImageNet
])

train_dataset = datasets.ImageFolder('./additional/dataset/train', transform=transform)
test_dataset = datasets.ImageFolder('./additional/dataset/test', transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2) # DataLoader разбивает датасет на батчи
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

In [9]:
class LeNet5_224(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 6, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(6),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        # 16 * 53 * 53 = 44944
        self.fc = nn.Linear(44944, 120)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(120, 84)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(84, num_classes)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        x = self.relu(x)
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        return x

In [24]:
model = LeNet5_224(num_classes).to(device) # переносим модель на видеокарту

cost = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [11]:
total_step = len(train_loader)

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        images = images.to(device)
        labels = labels.to(device)
            
        #Forward pass
        outputs = model(images)
        loss = cost(outputs, labels)
        
        #Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/5], Step [1/313], Loss: 0.6881
Epoch [1/5], Step [2/313], Loss: 1.6171
Epoch [1/5], Step [3/313], Loss: 5.0114
Epoch [1/5], Step [4/313], Loss: 3.1199
Epoch [1/5], Step [5/313], Loss: 0.7274
Epoch [1/5], Step [6/313], Loss: 1.8315
Epoch [1/5], Step [7/313], Loss: 1.4653
Epoch [1/5], Step [8/313], Loss: 0.6702
Epoch [1/5], Step [9/313], Loss: 1.6036
Epoch [1/5], Step [10/313], Loss: 1.1508
Epoch [1/5], Step [11/313], Loss: 0.6617
Epoch [1/5], Step [12/313], Loss: 0.6769
Epoch [1/5], Step [13/313], Loss: 1.4308
Epoch [1/5], Step [14/313], Loss: 1.2298
Epoch [1/5], Step [15/313], Loss: 0.6941
Epoch [1/5], Step [16/313], Loss: 1.2459
Epoch [1/5], Step [17/313], Loss: 1.2720
Epoch [1/5], Step [18/313], Loss: 0.9904
Epoch [1/5], Step [19/313], Loss: 0.6357
Epoch [1/5], Step [20/313], Loss: 0.9056
Epoch [1/5], Step [21/313], Loss: 1.0813
Epoch [1/5], Step [22/313], Loss: 0.8129
Epoch [1/5], Step [23/313], Loss: 0.6717
Epoch [1/5], Step [24/313], Loss: 0.9567
Epoch [1/5], Step [25/313

Модель обучилась плохо. Делаю ставку на то, что lenet была заточена на работу с картинками 32*32 в ч/б. Я же пытаюсь скормить ей цветные картинки 224*224. Модель не подходит для выявления закономерностей на картинках такого размера, нужен серьёзный апгрейд архитектуры.

In [25]:
total_step = len(train_loader)

lenet5 = nn.Sequential(
    nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5),
    nn.ReLU(),
    
    nn.AvgPool2d(kernel_size=2, stride=2),
    
    nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
    nn.ReLU(),
    
    nn.AvgPool2d(kernel_size=2, stride=2),
    
    nn.Flatten(),
    
    nn.Linear(in_features=16 * 5 * 5, out_features=120),
    nn.ReLU(),
    
    nn.Linear(in_features=120, out_features=84),
    nn.ReLU(),
    
    nn.Linear(in_features=84, out_features=2) 
)
lenet5.to(device)

Sequential(
  (0): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (1): ReLU()
  (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (4): ReLU()
  (5): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (6): Flatten(start_dim=1, end_dim=-1)
  (7): Linear(in_features=400, out_features=120, bias=True)
  (8): ReLU()
  (9): Linear(in_features=120, out_features=84, bias=True)
  (10): ReLU()
  (11): Linear(in_features=84, out_features=2, bias=True)
)

In [None]:
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        images = images.to(device)
        labels = labels.to(device)
            
        #Forward pass
        outputs = lenet5(images)
        loss = cost(outputs, labels)
        
        #Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, i+1, total_step, loss.item()))