In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision as tv

import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm

Датасет брался с kaggle 'dogs vs cats'

<h1>План</h1>
<li>Данные. Датасет + Трансформации </li>
<li>DataLoader</li>
<li>Arch</li>
<li>optimizer. Loss fn, metrics</li>
<li>train</li>
<li>use!</li>

In [3]:
class Dataset2class(torch.utils.data.Dataset):
    def __init__(self, path_dir1: str, path_dir2: str):
        super().__init__()
        
        self.path_dir1 = path_dir1
        self.path_dir2 = path_dir2
        
        
        self.dir1_list = sorted(os.listdir(path_dir1))
        self.dir2_list = sorted(os.listdir(path_dir2))
        

    def __len__(self):
        return len(self.dir1_list) + len(self.dir2_list)
        
    def __getitem__(self, idx):
        if idx <= len(self.dir1_list):
            class_id = 0
            img_path = os.path.join(self.path_dir1, self.dir1_list[idx])
        else:
            class_id = 1
            idx -= len(self.dir1_list)
            img_path = os.path.join(self.path_dir2, self.dir2_list[idx])
        
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img.astype(np.float32)
        img = img/255.0
        
        img = cv2.resize(img, (50, 50), interpolation=cv2.INTER_AREA)
        img.transpose((2, 0, 1))
        
        t_img = torch.from_numpy(img)
        t_class_id = torch.tensor(class_id)
        
        return {'img': img,'label': class_id}
        
        

In [4]:
train_dogs_path = './dataset/train/dogs/'
train_cats_path = './dataset/train/cats/'

test_dogs_path = './dataset/test/dogs/'
test_cats_path = './dataset/test/cats/'

train_ds_catsdogs = Dataset2class(train_dogs_path, train_cats_path)
test_ds_catsdogs = Dataset2class(test_dogs_path, test_cats_path)

In [5]:
len(train_ds_catsdogs)

20000

In [6]:
len(test_ds_catsdogs)

5000

In [7]:
batch_size = 16

train_loader = torch.utils.data.DataLoader(
    train_ds_catsdogs, shuffle=True,
    batch_size = batch_size, num_workers=1, drop_last = True
)

test_loader = torch.utils.data.DataLoader(
    test_ds_catsdogs, shuffle=True,
    batch_size = batch_size, num_workers=1, drop_last = False
)

In [8]:
 class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.act = nn.LeakyReLU(0.2)
        self.maxpool = nn.MaxPool2d(2, 2)
        self.conv0 = nn.Conv2d(3, 32, 3, stride=1, padding=0)
        self.conv1 = nn.Conv2d(3, 32, 3, stride=1, padding=0)
        self.conv2 = nn.Conv2d(3, 32, 3, stride=1, padding=0)
        self.conv3 = nn.Conv2d(3, 32, 3, stride=1, padding=0)
        self.conv4 = nn.Conv2d(3, 32, 3, stride=1, padding=0)
        
        self.adaptivepool = nn.AdaptiveAvgPool2d((1, 1))
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear(64, 10)
        self.linear2 = nn.Linear(10, 2)
            
    def forward(self, x):
        
        out = self.conv0(x)
        out = self.act(out)
        out = self.maxpool(out)
        
        out = self.conv1(out)
        out = self.act(out)
        out = self.maxpool(out)

        out = self.conv2(out)
        out = self.act(out)
        out = self.maxpool(out)

        out = self.conv3(out)
        out = self.act(out)

        out = self.adaptivepool(out)
        out = self.flatten(out)
        out = self.linear1(out)
        out = self.act(out)
        out = self.linear2(out)

        
        return out

In [9]:
model = ConvNet()

In [10]:
model

ConvNet(
  (act): LeakyReLU(negative_slope=0.2)
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv4): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (adaptivepool): AdaptiveAvgPool2d(output_size=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear1): Linear(in_features=64, out_features=10, bias=True)
  (linear2): Linear(in_features=10, out_features=2, bias=True)
)

In [None]:
for sample in train_loader:
    img = sample['img']
    label = sample['label']
    print(img, label)
    break

In [17]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.999))

In [18]:
def accuracy(pred, label):
    answer = F.softmax(pred.detach()).numpy().argmax(1) == label.numpy().argmax(1)
    return answer.mean()

In [None]:
epochs = 10

for epoch in range(epochs):
    loss_val = 0
    acc_val = 0
    for sample in (pbar := tqdm(train_loader)):
        img, label = sample['img'], sample['label']
        optimizer.zero_grad() # обнуление градиентов

        label = nn.functional.one_hot(label, 10).float() # преобразование из метки класса в
        # вектор вероятности. Было: 2, стало: [0, 0, 1, 0, 0, 0, 0, 0, 0, 0]
        pred = model(img)

        loss = loss_fn(pred, label)

        loss.backward()
        loss_item = loss.item()
        loss_val+= loss_item

        optimizer.step()

        acc_current = accuracy(pred, label)
        acc_val += acc_current

    pbar.set_description(f'loss: {loss_item: .5f}\taccuracy: {acc_current:.3f}')
    print(loss_val/len(dataloader))
    print(acc_val/len(dataloader))


  0%|                                                                                         | 0/1250 [00:00<?, ?it/s]

0
1
2
3
4
5
6
7
8
9
