<h3>Импорт</h3>

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision as tv

import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm


<h3>Датасет</h3>

In [27]:
class Dataset2class(torch.utils.data.Dataset):
    def __init__(self, path_dir1:str, path_dir2:str):
        super().__init__()
        
        self.path_dir1 = path_dir1
        self.path_dir2 = path_dir2
        
        # Keep only common image files and ensure they're actual files
        allowed_ext = ('.jpg', '.jpeg', '.png', '.bmp')
        self.dir1_list = sorted([f for f in os.listdir(path_dir1)
                                 if f.lower().endswith(allowed_ext) and os.path.isfile(os.path.join(path_dir1, f))])
        self.dir2_list = sorted([f for f in os.listdir(path_dir2)
                                 if f.lower().endswith(allowed_ext) and os.path.isfile(os.path.join(path_dir2, f))])
        
        if len(self.dir1_list) == 0 or len(self.dir2_list) == 0:
            raise RuntimeError(f"No images found in {path_dir1} or {path_dir2} (checked extensions {allowed_ext})")
        
    def __getitem__(self, idx):
        if idx < len(self.dir1_list):
            class_id = 0 
            img_path = os.path.join(self.path_dir1, self.dir1_list[idx])
        else: 
            class_id = 1
            idx -= len(self.dir1_list)
            img_path = os.path.join(self.path_dir2, self.dir2_list[idx])
            
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        if img is None:
            # Fail fast with informative message instead of silent errors/hangs
            raise FileNotFoundError(f"Failed to read image: {img_path}")
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img.astype(np.float32)
        img = img/255.0
        
        img = cv2.resize(img, (64, 64), interpolation=cv2.INTER_AREA)
        
        img = img.transpose((2, 0, 1))
        
        t_img = torch.from_numpy(img)
        t_class_id = torch.tensor(class_id)
        
        return {'img': t_img, 'label': t_class_id}
         
    def __len__(self):
        return len(self.dir1_list) + len(self.dir2_list)

In [28]:
train_cats_dir = './PetImages/Cat'
train_dogs_dir = './PetImages/Dog'

test_cats_dir = './PetImages/Cat'
test_dogs_dir = './PetImages/Dog'

train_ds_catsdogs = Dataset2class(train_cats_dir, train_dogs_dir)
test_ds_catsdogs = Dataset2class(test_cats_dir, test_dogs_dir)



<h3>Data Loader</h3>

In [29]:
batch_size = 16

train_loader = torch.utils.data.DataLoader(
    train_ds_catsdogs, shuffle=True, 
    batch_size=batch_size, num_workers=0, drop_last=True 
)
test_loader = torch.utils.data.DataLoader(
    test_ds_catsdogs, shuffle=True, 
    batch_size=batch_size, num_workers=0, drop_last=False 
)

<h3>Architecture</h3>

In [43]:
# Сверточная нейронная сеть
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.act = nn.LeakyReLU(0.2)
        self.maxpool = nn.MaxPool2d(2, 2)
        
        self.conv0 = nn.Conv2d(3, 32, 3, stride=1, padding=0)
        self.conv1 = nn.Conv2d(32, 32, 3, stride=1, padding=0)
        self.conv2 = nn.Conv2d(32, 32, 3, stride=1, padding=0)
        self.conv3 = nn.Conv2d(32, 32, 3, stride=1, padding=0)
        self.conv4 = nn.Conv2d(32, 32, 3, stride=1, padding=0)
        
        self.adaptivepool = nn.AdaptiveAvgPool2d((1, 1))
        self.flatten = nn.Flatten()
        # flattened features == number of channels after convs (32)
        self.linear1 = nn.Linear(32, 10)
        self.linear2 = nn.Linear(10, 2)
        

    def forward(self, x):
        out = self.conv0(x)
        out = self.act(out)
        out = self.maxpool(out)
        
        out = self.conv1(out)
        out = self.act(out)
        out = self.maxpool(out)
        
        out = self.conv2(out)
        out = self.act(out)
        out = self.maxpool(out)
        
        out = self.conv3(out)
        out = self.act(out)

        out = self.conv4(out)
        out = self.act(out)
        
        out = self.adaptivepool(out)
        out = self.flatten(out)
        out = self.linear1(out)
        out = self.act(out)
        out = self.linear2(out)
        
        
        return out

In [44]:
model = ConvNet()

In [45]:
model

ConvNet(
  (act): LeakyReLU(negative_slope=0.2)
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (adaptivepool): AdaptiveAvgPool2d(output_size=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear1): Linear(in_features=32, out_features=10, bias=True)
  (linear2): Linear(in_features=10, out_features=2, bias=True)
)

In [46]:
for sample in train_loader:
    img = sample['img']
    label = sample['label']
    model(img)
    break

In [47]:
img.shape

torch.Size([16, 3, 64, 64])