## Установка и импорт модулей 

In [None]:
!pip install wandb
import wandb

In [None]:
import albumentations as A
import albumentations.pytorch as Ap
import argparse
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import PIL
import requests
import sklearn
import sys
import time
import torch
import torch.nn as nn
import torchvision

# from google.colab import drive
from IPython.display import clear_output
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms as tf
from torchvision.datasets import DatasetFolder 
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

if not os.path.exists('./drive'):
    drive.mount('./drive')

In [None]:
if not os.path.exists('/root/.kaggle/kaggle.json'):
    !mkdir /root/.kaggle
    !cp ./drive/MyDrive/kaggle.json /root/.kaggle/kaggle.json
    !kaggle datasets download -d andreybeyn/qudata-gembed-landmarks-210
    !unzip -q qudata-gembed-landmarks-210.zip

    !mkdir ./models ./models/models ./models/desc
    

Downloading qudata-gembed-landmarks-210.zip to /content
 99% 736M/741M [00:10<00:00, 86.8MB/s]
100% 741M/741M [00:10<00:00, 75.3MB/s]


In [None]:
wandb.login()

In [None]:
run = wandb.init(
    project = 'ml_landmarks',
    entity = 'ml_landmarks',
)

# Пайплайн примерно такой:


*   Обрабатываем данные:

    * Считываем данные, перегоняем в тензоры

    * Мб делаем нормализацию (пока нет)

    * Разбиваем на тренировочную/валидационную

*   Пишем сетки: пробуем менять архитектуру, если совсем голяк - меняем предобработку.

*   Попробовать сделать ансамбли: бэггинг!, бустинг.

*   Оценивать будем `F1`, скорее всего.

*   Если все совсем совсем плохо:

    * Пробовать более сильные ансамбли. Если тут голяк - пробовать еще:)
    
    * Будем пробовать аугментацию, потому что картинок реально мало
    
    * Можно будет попробовать найти похожую сетку (похожую, исходя из поставленной задачи), и попробовать её зафайнтьюнить.
    
    * Брать другой датасет. Есть сразу проблемы: они в большинстве своем неразмечены (те, которые я находил).


## Чтение данных

Тут я попробовал поиграть с вариантами хранения данных. Где то считывал данные, и хранил уже обработанные тензоры, где то считывал данные, и обрабатывал только при необходимости. Также использовал класс, встроенный в `torchvision`. В конце привел сравнение работы всех классов.

В датасете есть черно-белые фотографии, и фотографии, в которых 4 канала, а не 3 по стандарту. Таких картинок немного, поэтому удалим их.

In [None]:
init_path = './landmarks/'

# Get all filepaths
all_files = set()
labels = []
for path, dirs, files in os.walk(init_path):
    if dirs == []:
        for file_ in files:
            filepath = '/'.join([path, file_])
            all_files.add(filepath)
    else:
        labels.extend(dirs)

# Filtering
supported_types = ('RGB')

incorrect_files = set()
for filename in all_files:
    img = PIL.Image.open(filename)
    if img.mode not in supported_types:
        incorrect_files.add(filename)
    del img

all_files = list(all_files - incorrect_files)
print('\n'.join([f'All files: {len(all_files)}',
                 f'Deleted files: {len(incorrect_files)}']))

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

All files: 10515
Deleted files: 19


device(type='cpu')

### FastDataset(читает на лету)
Тут я сделал класс датасета с быстрым доступом.

Мы сразу по названию файла обрезаем её, делаем из картинки тензор, запоминаем его, потом получаем к нему доступ просто по индексу, не делая никакой предобработки.

+: Быстро бегаем

-: Долгая инициализация

-: В теории, может сожрать всю память

In [None]:
class FastDataset(Dataset):
    def __init__(self, mode = 'train', files = all_files, labels = labels,
                 transform = None,
                 image_shape = (200, 200)):
        
        '''
        mode - train/valid/test
        labels - list with all possible namelabels
        transform - proccessing of file
        image_shape - shape of result tensor
        '''
        
        self.mode = mode
        self.image_shape = image_shape
        self.transform = transform if transform \
        else tf.Compose([tf.Resize(image_shape), tf.PILToTensor()])
        self.x = []
        self.y = []
        self.device = device

        self.check_mode = self.mode in ('train', 'valid')

        self.le = LabelEncoder()
        self.le.fit(labels)
        # Saving tensors from PIL.Image
        for path in files:
            label = path.split('/')[-2]
            tensor = self.get_sample(path)
            self.x.append(tensor / 255)
            self.y.append(label)

        self._len = len(self.x)

    def get_sample(self, filepath):
        with PIL.Image.open(filepath) as image:
            image = PIL.Image.open(filepath)
            tensor = self.transform(image)
        return tensor
                    
    def __len__(self):
        return self._len

    def __getitem__(self, idx):
        '''
        Returns Tensor, str (optional)
        '''
        if self.check_mode:
            y = self.le.transform([self.y[idx]])
            return self.x[idx], y[0]
        else:
            return self.x[idx]

    def decode(self, num_label):
        return self.le.inverse_transform([num_label])[0]

    def train_valid_split(self, train_size = 0.9):
        '''
        Unfirom split of files.

        Returns two datasets: train_dataset and valid_dataset
        '''
        def handle_one_class(label):
            file_list = get_class_samples(label)
            train_set, valid_set = train_test_split(tuple(file_list),
                                                    train_size = train_size)
            return train_set, valid_set

        def get_class_samples(label):
            return set([filename
            for filename in self.files if label in filename.split('/')])

        train_list = []
        valid_list = []
        labels = self.le.classes_
        
        for label in labels:
            cur_train_list, cur_valid_list = handle_one_class(label)
            train_list.extend(cur_train_list)
            valid_list.extend(cur_valid_list)

        train_ds = FastDataset(mode = 'train',
                                      labels = labels,
                                      image_shape = self.image_shape,
                                      files = train_list)

        valid_ds = FastDataset(mode = 'valid',
                                      labels = labels,
                                      image_shape = self.image_shape,
                                      files = valid_list)
        return train_ds, valid_ds

### CustomDataset(обработка, потом чтение)

Это тоже класс датасета, но с медленным доступом. Здесь мы запоминаем все пути до картинок, потом при получении по индексу делаем предобработку, типа ресайз и перегоняем в тензор.

+: Жрет немного памяти (ну во всяком случае меньше, чем `FastDataset`)

+: Быстрая иницилазция

-: Долго бегает

In [None]:
class CustomDataset(Dataset):
    def __init__(self, mode = 'train', files = all_files, labels = labels,
                 transform = None,
                 image_shape = (200, 200)):
        
        '''
        mode - train/valid/test
        files - list/set with filepaths
        labels - list with all possible namelabels
        transform - proccessing of file
        image_shape - shape of result tensor
        '''

        self.mode = mode
        self.transform = transform
        self.image_shape = image_shape
        self.files = files
        
        self.check_mode = self.mode in ('train', 'test')
        
        self.le = LabelEncoder()
        self.le.fit(labels)

    def __len__(self):
        return len(self.files)
    
    def default_transform(self, img):
        '''
        Make image resizing, and converting to tensor
        '''
        transform = tf.Compose([
            tf.Resize(self.image_shape),
            tf.PILToTensor()
        ])
        return transform(img)

    def __getitem__(self, idx):
        path = self.files[idx]
        with PIL.Image.open(path) as img:
            if self.transform:
                tensor = self.transform(img)
            else:
                tensor = self.default_transform(img)

        if self.check_mode:
            label = self.get_label(idx)
            return tensor, self.le.transform([label])[0]
        else:
            return tensor

    def get_label(self, idx):
        assert self.check_mode, \
        'It is not possible to get label'
        path = self.files[idx]
        return path.split('/')[2]

    def decode(self, num_label):
        return self.le.inverse_transform([num_label])[0]

    def train_valid_split(self, train_size = 0.9):
        '''
        Unfirom split of files.

        Returns two datasets: train_dataset and valid_dataset (augmentations = [None])
        '''
        def handle_one_class(label):
            file_list = get_class_samples(label)
            train_set, valid_set = train_test_split(tuple(file_list),
                                                    train_size = train_size)
            return train_set, valid_set

        def get_class_samples(label):
            return set([filename
            for filename in self.files if label in filename.split('/')])

        train_list = []
        valid_list = []
        labels = self.le.classes_
        
        for label in labels:
            cur_train_list, cur_valid_list = handle_one_class(label)
            train_list.extend(cur_train_list)
            valid_list.extend(cur_valid_list)

        train_ds = CustomDataset(mode = 'train',
                                      labels = labels,
                                      image_shape = self.image_shape,
                                      files = train_list)
        
        valid_ds = CustomDataset(mode = 'valid',
                                      labels = labels,
                                      image_shape = self.image_shape,
                                      files = valid_list)
        return train_ds, valid_ds

### `torchvision.datasets.DatasetFolder`
Тоже прогоним через сравнение просто для проверки

In [None]:
def make_DatasetFolder(path = init_path, transform = None,
                       extensions = [], image_shape = (200, 200), mode = None,
                       **kwargs):
    def loader(path):
        return PIL.Image.open(path)
        
    if not transform:
        transform = tf.Compose([
            tf.Resize(image_shape),
            tf.PILToTensor()
        ])

    extensions = ['jpg', 'jpeg', 'png', 'webp']
    return DatasetFolder(path, loader = loader,
                         extensions = extensions, transform = transform)

### `AugmentedFastDataset`

Версия `FastDataset`, дополненная аугментациями

In [None]:
class AugmentedFastDataset(Dataset):
    def __init__(self, mode = 'train', files = all_files, labels = labels,
                 transform = None,
                 image_shape = (200, 200)):
        '''
        mode - train/valid/test
        labels - list with all possible namelabels
        transform - proccessing of file
        image_shape - shape of result tensor
        '''
        
        self.mode = mode
        self.image_shape = image_shape
        self.transform = transform if transform \
        else tf.Compose([tf.Resize(image_shape), tf.PILToTensor()])
        
        self.x = []
        self.y = []

        self.check_mode = self.mode in ('train', 'valid')
        self._len = len(files)

        self.le = LabelEncoder()
        self.le.fit(labels)

        self.augmentations = (
            None, 
            tf.ColorJitter(brightness = 0.3,
                           contrast = 0.3,
                           saturation = 0.3),
            tf.RandomPosterize(bits = 2, p = 1),
            tf.RandomAdjustSharpness(sharpness_factor = 2,
                                     p = 1),
            tf.RandomEqualize(p = 1),
            tf.RandomRotation(degrees = (-20, 20)),
            tf.RandomHorizontalFlip(p = 1)
        )

        self.augmentations_amount = len(self.augmentations)

        # Saving tensors from PIL.Image
        for path in files:
            label = path.split('/')[-2]
            tensor = self.get_sample(path)
            augmentations = self.get_augmented_samples(tensor)
            self.x.extend(augmentations)
            self.y.extend([label] * self.augmentations_amount)

    def get_sample(self, filepath):
        with PIL.Image.open(filepath) as image:
            image = PIL.Image.open(filepath)
            tensor = self.transform(image)
        return tensor
                    
    def get_augmented_samples(self, tensor):
        answer = [tensor / 255]
        answer.extend(
            [augmentation(tensor) / 255 
            for augmentation in self.augmentations if augmentation]
        )
        return answer

    def __len__(self):
        return self._len * self.augmentations_amount

    def __getitem__(self, idx):
        '''
        Returns Tensor, str (optional)
        '''
        if self.check_mode:
            y = self.le.transform([self.y[idx]])
            return self.x[idx], y[0]
        else:
            return self.x[idx]

    def decode(self, num_label):
        return self.le.inverse_transform([num_label])[0]

    def train_valid_split(self, train_size = 0.9):
        '''
        Unfirom split of files.

        Returns two datasets: train_dataset and valid_dataset (augmentations = [None])
        '''
        def handle_one_class(label):
            file_list = get_class_samples(label)
            train_set, valid_set = train_test_split(tuple(file_list),
                                                    train_size = train_size)
            return train_set, valid_set

        def get_class_samples(label):
            return set([filename
            for filename in self.files if label in filename[0].split('/')])

        train_list = []
        valid_list = []
        labels = self.le.classes_
        
        for label in labels:
            cur_train_list, cur_valid_list = handle_one_class(label)
            train_list.extend(cur_train_list)
            valid_list.extend(cur_valid_list)

        train_ds = AugmentedFastDataset(mode = 'train',
                                      labels = labels,
                                      image_shape = self.image_shape,
                                      files = train_list,
                                      augmentations = [None])
        train_ds.augmentations = self.augmentations

        valid_ds = AugmentedFastDataset(mode = 'valid',
                                      labels = labels,
                                      image_shape = self.image_shape,
                                      files = valid_list,
                                      augmentations = [None])
        valid_ds.augmentations = self.augmentations
        return train_ds, valid_ds

### `AugemntedCustomDataset`

Версия `CustomDataset`, дополненная аугментациями

In [None]:
class AugmentedCustomDataset(Dataset):
    def __init__(self, mode = 'train', files = all_files, labels = labels,
                 transform = None,
                 image_shape = (200, 200), augmentations = None):
        '''
        mode - train/valid/test
        files - list/set with filepaths
        labels - list with all possible namelabels
        transform - proccessing of file
        image_shape - shape of result tensor
        '''
        self.mode = mode
        self.transform = transform
        self.image_shape = image_shape

        self.check_mode = self.mode in ('train', 'valid')
        
        self.le = LabelEncoder()
        self.le.fit(labels)

        # Initialize augmentation options
        if augmentations:
            self.augmentations = augmentations
        else:
            self.augmentations = [
                None, 
                tf.ColorJitter(brightness = 0.3,
                            contrast = 0.3,
                            saturation = 0.3),
                tf.RandomPosterize(bits = 2, p = 1),
                tf.RandomAdjustSharpness(sharpness_factor = 2,
                                        p = 1),
                tf.RandomEqualize(p = 1),
                tf.RandomRotation(degrees = (-20, 20)),
                tf.RandomHorizontalFlip(p = 1)
            ]
        self.augmentations_amount = len(self.augmentations)
        if self.augmentations == [None]:
            self.files = files
        
        else:
            self.files = []
            for filename in files:
                augmented_filenames = [(filename, i) 
                                    for i in range(self.augmentations_amount)]
                self.files.extend(augmented_filenames)

        self._len = len(self.files)

    def __len__(self):
        return self._len
    
    def default_transform(self, img):
        '''
        Make image resizing, and converting to tensor
        '''
        transform = tf.Compose([
            tf.Resize(self.image_shape),
            tf.PILToTensor()
        ])
        return transform(img)

    def __getitem__(self, idx):
        # Find path to file depending on idx
        filename, augment_idx = self.files[idx]
        augment = self.augmentations[augment_idx]
        with PIL.Image.open(filename) as img:
            if self.transform:
                tensor = self.transform(img)
            else:
                tensor = self.default_transform(img)
            
            if augment:
                tensor = augment(tensor)

            tensor = tensor / 255

        if self.check_mode:
            label = self.get_label(filename)
            return tensor, self.encode(label)
        else:
            return tensor

    def get_label(self, path):
        assert self.check_mode, \
        'It is not possible to get label'
        return path.split('/')[-2]

    def encode(self, str_label):
        return self.le.transform([str_label])[0]

    def decode(self, num_label):
        return self.le.inverse_transform([num_label])[0]

    def get_augmented_samples(self, idx):
        begin_idx = idx * self.augmentations_amount
        return [self[begin_idx + i][0] for i in range(self.augmentations_amount)]
    
    def draw_augmented_samples(self, idx):
        samples = self.get_augmented_samples(idx)
        plt.figure(figsize = (20, 20))
        for i, sample in enumerate(samples):
            plt.subplot(1, len(samples), i + 1)
            plt.imshow(sample.permute(1, 2, 0))
    
    def analyze_splitting(self):
        for_plot = {}
        for filename in self.files:
            label = self.get_label(filename)
            if label in for_plot:
                for_plot[label] += 1
            else:
                for_plot[label] = 1
        for_plot = pd.DataFrame.from_dict(for_plot, orient = 'index',
                                          columns = ['Amount'])
        return for_plot

    def train_valid_split(self, train_size = 0.9):
        '''
        Unfirom split of files.

        Returns two datasets: train_dataset and valid_dataset (augmentations = [None])
        '''
        def handle_one_class(label):
            file_list = get_class_samples(label)
            train_set, valid_set = train_test_split(tuple(file_list),
                                                    train_size = train_size)
            return train_set, valid_set

        def get_class_samples(label):
            return set([filename
            for filename in self.files if label in filename[0].split('/')])

        train_list = []
        valid_list = []
        labels = self.le.classes_
        
        for label in labels:
            cur_train_list, cur_valid_list = handle_one_class(label)
            train_list.extend(cur_train_list)
            valid_list.extend(cur_valid_list)

        train_ds = AugmentedCustomDataset(mode = 'train',
                                      labels = labels,
                                      image_shape = self.image_shape,
                                      files = train_list,
                                      augmentations = [None])
        train_ds.augmentations = self.augmentations

        valid_ds = AugmentedCustomDataset(mode = 'valid',
                                      labels = labels,
                                      image_shape = self.image_shape,
                                      files = valid_list,
                                      augmentations = [None])
        valid_ds.augmentations = self.augmentations
        return train_ds, valid_ds

### `AdvancedCustomDataset`

В общем то, стало понятно, что обучение даже при использовании `AugmentedCustomDataset` не является эффективным, так как переобучение появляется уже на ранних этапах. Давайте доработаем `AugmentedCustomDataset` таким образом: теперь мы будем применять не фиксированный список возможных трансформаций, а будем дополнять уже существующее множество фотографий до определенного порога, и будем делать это для каждого класса. В результате работы планируется получить набор, который будет содержать одно и то же количество фотографий для каждого класса.

In [None]:
class AdvancedCustomDataset(Dataset):
    def __init__(self, augmentate, files = all_files, labels = labels, 
                 ex_amount = 1000, mode = 'train', transform = None,
                 image_shape = (200, 200), augmentations = None):
        '''
        ex_amount - number of photo per class
        mode - train/valid/test
        files - list/set with filepaths
        labels - list with all possible namelabels
        transform - proccessing of file
        image_shape - shape of result tensor
        '''
        self.mode = mode
        self.transform = transform \
        if transform \
        else A.Compose([A.Normalize(),
                        A.Resize(*image_shape)])
        self.image_shape = image_shape
        self.ex_amount = ex_amount
        self.check_mode = self.mode in ('train', 'valid')
        self.le = LabelEncoder()
        self.le.fit(labels)

        # Initialize augmentation options
        if augmentations:
            self.augmentations = augmentations
        else:
            self.augmentations = (
                A.ColorJitter(brightness = 0.3,
                              contrast = 0.3,
                              saturation = 0.3),
                A.Posterize(num_bits = 2, p = 1),
                A.Sharpen(alpha = (0.9, 1.0)),
                A.Equalize(p = 1),
                A.Rotate(limit = (-20, 20), p = 1),
                A.HorizontalFlip(p = 1)
        )
        self.augmentations_amount = len(self.augmentations)
        self.files = files
        if augmentate:
            self.files = self.augmentate()

        self._len = len(self.files)

    def augmentate(self):
        labels = self.le.classes_
        new_files = []
        for label in labels:
            new_files_for_label = self.augmentate_one_class(label)
            new_files.extend(new_files_for_label)
        return new_files

    def augmentate_one_class(self, label):
        ex_amount = self.ex_amount
        files = self.get_class_samples(label)
        new_files = []
        while len(new_files) < ex_amount:
            filename = np.random.choice(files, size = 1)[0]
            augmentations_amount = np.random.randint(low = 0,
                                                     high = self.augmentations_amount)
            if augmentations_amount:
                augmentations = np.random.choice(a = self.augmentations,
                                                 size = augmentations_amount,
                                                 replace = False)
                augmentations = A.Compose(augmentations)
                new_files.append((filename, augmentations))
            else:
                new_files.append((filename, None))
        return new_files

    def get_class_samples(self, label):
        return [filename
        for filename in self.files if label in filename.split('/')]

    def __len__(self):
        return self._len

    def __getitem__(self, idx):
        # Find path to file depending on idx
        filename, augmentations = self.files[idx]
        img = cv2.imread(filename)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        tensor = self.transform(image = img)['image']
        if augmentations:
            tensor = augmentations(image = tensor)['image']

        tensor = tensor / 255
        tensor = Ap.ToTensorV2()(image = tensor)['image'].float()

        if self.check_mode:
            label = self.get_label(filename)
            return tensor, self.encode(label)
        else:
            return tensor

    def get_augmented_samples(self, idx):
        '''
        Method to get all augmentations with the same image
        idx - index in self.files
        '''
        filename = self.files[idx][0]
        answer = [item for item in self.files
                  if filename == item[0]]
        return answer

    def draw_augmented_samples(self, idx):
        files = self.get_augmented_samples(idx)
        columns = 5
        number = len(files)
        if number % columns:
            lines = int(number / columns) + 1
        else:
            lines = int(number / columns)
        print(f'{number}: {lines}:{columns}')
        plt.figure(figsize = (20, 20))
        for idx, item in enumerate(files):
            filename, augmentation = item
            img = cv2.imread(filename)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            if augmentation:
                img = augmentation(image = img)['image']
            plt.subplot(lines, columns, idx + 1)
            plt.imshow(img)

    def get_label(self, path):
        assert self.check_mode, \
        'It is not possible to get label'
        return path.split('/')[-2]

    def encode(self, str_label):
        return self.le.transform([str_label])[0]

    def decode(self, num_label):
        return self.le.inverse_transform([num_label])[0]

    def train_valid_split(self, train_size = 0.9):
        '''
        Unfirom split of files.

        Returns two datasets: train_dataset and valid_dataset
        '''
        def handle_one_class(label):
            file_list = get_class_samples(label)
            train_set, valid_set = train_test_split(tuple(file_list),
                                                    train_size = train_size)
            return train_set, valid_set

        def get_class_samples(label):
            return set([filename
            for filename in self.files if label in filename[0].split('/')])

        train_list = []
        valid_list = []
        labels = self.le.classes_
        
        for label in labels:
            cur_train_list, cur_valid_list = handle_one_class(label)
            train_list.extend(cur_train_list)
            valid_list.extend(cur_valid_list)

        train_ds = AdvancedCustomDataset(augmentate = False, mode = 'train',
                                      labels = labels,
                                      image_shape = self.image_shape,
                                      files = train_list)
        train_ds.augmentations = self.augmentations

        valid_ds = AdvancedCustomDataset(augmentate = False, mode = 'valid',
                                      labels = labels,
                                      image_shape = self.image_shape,
                                      files = valid_list)
        valid_ds.augmentations = self.augmentations
        return train_ds, valid_ds

In [None]:
adc = AdvancedCustomDataset(augmentate=True,
                            ex_amount = 500,
                            image_shape = (100, 100))

{}

### Сравнение

In [None]:
def memory_counter(ex, all = False):
    '''
    Memory counter for existing class instance
    all - count all variables and methods in ex, else exclude __methods__
    '''
    mem = 0
    if all:
        for key, val in ex.__dict__.items():
            mem += sys.getsizeof(val)
        return mem
    else:
        for key, val in ex.__dict__.items():
            if key.startswith('_'):
                continue
            else:
                mem += sys.getsizeof(val)
        return mem

def dataset_metric(cls, print_info = True, **kwargs):
    '''
    Comparing of classes with datasets: init, traverse, memory
    '''
    print(f'Class name: {cls.__name__}')
    begin = time.time()
    ex = cls(**kwargs)
    to_init = time.time() - begin
    print('Time to init: {:.5f} s'.format(to_init))
    begin = time.time()
    for _ in ex:
        pass
    to_traverse = time.time() - begin
    print('Time to traverse: {:.5f} s'.format(to_traverse))
    memory = memory_counter(ex)
    info = '\n'.join(['Memory: {} bytes = {:.3f} MB',
                    'Total elements: {} elements',
                    'Mean iteration time: {:.4f} s',
                    'Mean memory usage per element: {:.4f} bytes',
                     '']).format(memory, memory / 10 ** 6, 
                                 len(ex), 
                                 to_traverse / len(ex),
                                 memory / len(ex))
    if print_info:
        print(info)
    
    d = (cls.__name__, to_init, to_traverse, memory)
    del ex
    return d

In [None]:
advds_metric = dataset_metric(AdvancedCustomDataset, augmentate = True)

Class name: AdvancedCustomDataset
Time to init: 22.53049 s
Time to traverse: 1930.73612 s
Memory: 1764466 bytes = 1.764 MB
Total elements: 210000 elements
Mean iteration time: 0.0092 s
Mean memory usage per element: 8.4022 bytes



In [None]:
cds_metric = dataset_metric(CustomDataset)

Class name: CustomDataset
Time to init: 0.00100 s
Time to traverse: 126.87075 s
Memory: 84378 bytes = 0.084 MB
Total elements: 10515 elements
Mean iteration time: 0.0121 s
Mean memory usage per element: 8.0245 bytes



In [None]:
fds_metric = dataset_metric(FastDataset)

Class name: FastDataset
Time to init: 131.19805 s
Time to traverse: 3.40911 s
Memory: 170610 bytes = 0.171 MB
Total elements: 10515 elements
Mean iteration time: 0.0003 s
Mean memory usage per element: 16.2254 bytes



In [None]:
dfds_metric = dataset_metric(make_DatasetFolder)

Class name: make_DatasetFolder
Time to init: 0.09396147727966309
Time to traverse: 127.35288572311401
Memory: 2409920 bytes = 2.410 MB
Total elements: 10534 elements
Mean iteration time: 0.012
Mean memory per element: 228.775 bytes


In [None]:
acds_metric = dataset_metric(AugmentedCustomDataset)

Class name: AugmentedCustomDataset
Time to init: 0.020332813262939453
Time to traverse: 1068.542881011963
Memory: 6457296 bytes = 6.457 MB
Total elements: 73605 elements
Mean iteration time: 0.015
Mean memory per element: 87.729 bytes


### Выводы
Тут надо подумать и выбрать, пока буду юзать Fast.

Стоит еще заметить вот что: при работе с `torchvision.datasets.DatasetFolder` у нас классификация происходит иначе, нежели в остальных классах. Соответственно, если обучить сетку, а потом поменять тип используемого датасета, то будет плохо. Поэтому на этом мы прощаемся этой штукой. Не очень грустно, потому что в сравнении с другими вариантами она не сказать что превосходит по времени/памяти.

В перспективе, конечно, правильнее будет использовать `CustomDataset`, потому что при расширении датасета для `FastDataset` может банально не хватить памяти. Пока же, мы будем использовать `FastDataset`, и если что, добавлю возможность переключения на `CustomDataset`.

Но как выяснилось, данных слишком мало, поэтому их пришлось аугментировать. Создал два новых класса: `AugmentedCustomDataset` - аналог `CustomDataset` с возможностью аугментации, и `AugmentedFastDataset` - аналог `FastDataset` с возможностью аугментации.

Далее, будем использовать `AugmentedCustomDataset`, потому что быстрый аналог банально перестал влезать в память. Но придется потерять во времени..(

Также, во время прогонки `AdvancedCustomDataset` выяснилось, что методы аугментации из `torchvision.transforms` работают несколько дольше, нежели аналоги из `albumentations`. Поэтому в `AdvancedCustomDataset` будут использоваться методы модуля `albumentations`.

А еще, оказалось, что считывание через `cv2.imread` работает быстрее,чем `PIL.Image.open`

## Разбиение данных

Это я выделил в отдельный раздельчик, причина, как по мне, существенная: картинок очень мало (по 50 на класс), классов очень много (210) и хотелось бы проконтроллировать, чтоб в тренировочной выборке был баланс классов. Я думаю, что в тренировочную выборку мы закинем 90% (для начала, дальше видно будет). Возможно, придется обучать на всей выборке, а потом валидиться на каком то подмножестве тренировочной выборки.

In [None]:
def make_loaders(ds_cls, train_size, train_bs, valid_bs, ds_params):
    '''
    ds_cls - class of using dataset
    Return two DataLoaders: train and valid
    '''

    ds = ds_cls(**ds_params)
    train_ds, valid_ds = ds.train_valid_split(train_size = train_size)

    train_dl = DataLoader(train_ds, batch_size = train_bs,
                          shuffle = True, num_workers = 1)

    valid_dl = DataLoader(valid_ds, batch_size = valid_bs,
                          shuffle = False, num_workers = 1)
    
    return train_dl, valid_dl

## Цикл обучения с валидацией

In [None]:
def train_valid(model, train_dl, valid_dl,
                opt_cls, opt_params, loss_fn, 
                metric_fn, max_epochs:int,
                device, exp_name,
                scheduler_cls = None, scheduler_params = None):
    '''
    Train and validation cycle.
    
    train_dl - DataLoader with train data
    valid_dl - DataLoader with valid data
    opt - optimizer
    loss_fn - loss function
    metric_fn - metric function to evaluate model
    max_epochs - epochs to training and validation
    scheduler_cls - class of scheduler
    '''
    train_losses = []
    valid_losses = []
    train_metric = []
    valid_metric = []
    
    def print_loss_metric_info(train_loss = train_losses, 
                               valid_loss = valid_losses,
                               train_metric = train_metric, 
                               valid_metric = valid_metric):
        '''
        Logger function
        '''
        template = '\n'.join(['',
                              'Losses on train: {}',
                              'Losses on valid: {}',
                              'Metric on train: {}',
                              'Metric on valid: {}'])
        print(template.format(train_loss,
                               valid_loss,
                               train_metric,
                               valid_metric))

    # Optimizer initialization
    opt = opt_cls(params = model.parameters(), 
                  **opt_params)
    
    # Scheduler initialization
    if scheduler_cls:
        scheduler = scheduler_cls(optimizer = opt,
                                  **scheduler_params)
    else:
        scheduler = None
    model = model.to(device)
    train_time = 0
    valid_time = 0
    for epoch in tqdm(range(max_epochs), desc = 'Epoch'):
    # Training cycle
        model.train()
        train_losses_epoch = []
        train_metric_epoch = []
        print_loss_metric_info()
        begin_time = time.time()
        for x, y in tqdm(train_dl):
            opt.zero_grad()
            x, y = x.to(device), y.to(device)
            output = model(x)
            y_pred = torch.argmax(output, dim = -1)

            loss = loss_fn(output, y)
            loss.backward()
            opt.step()
            metric_value = metric_fn(y.to('cpu'), y_pred.to('cpu'), average = 'macro')
            train_metric_epoch.append(metric_value)
            train_losses_epoch.append(loss.item())
        train_time += (time.time() - begin_time)
        train_losses.append(np.mean(train_losses_epoch))
        train_metric.append(np.mean(train_metric_epoch))

    # Valid cycle
        model.eval()
        valid_losses_epoch = []
        valid_metric_epoch = []
        print_loss_metric_info()
        begin_time = time.time()
        with torch.no_grad():
            for x, y in tqdm(valid_dl):
                x, y = x.to(device), y.to(device)
                output = model(x)
                y_pred = torch.argmax(output, dim = -1)

                loss = loss_fn(output, y)

                metric_value = metric_fn(y.to('cpu'), y_pred.to('cpu'), average = 'macro')
                valid_losses_epoch.append(loss.item())
                valid_metric_epoch.append(metric_value)

        valid_metric.append(np.mean(valid_metric_epoch))
        valid_losses.append(np.mean(valid_losses_epoch))
        valid_time += (time.time() - begin_time)
        # Saving model params
        if valid_metric[-1] == max(valid_metric):
            torch.save(model.state_dict(),
                './models/models/' + exp_name + '.pth')
        if scheduler:
            scheduler.step()

        clear_output()

    return train_losses, valid_losses, train_metric, valid_metric, train_time, valid_time

## Написание моделей

В качестве моделей мы будем использовать сверточные нейронные сети и различные ансамбли.

### Сверточные сети

Для перебора архитектур не будем заводить отдельного класса, а напишем один раз шаблон и будем его менять прям в коде, потому что далее все равно будет выполняться сохранение моделей.

In [None]:
class Net(nn.Module):
    def __init__(self, n_classes = len(labels)):
        super().__init__()
        
        self.n_classes = n_classes

        self.conv1 = nn.Conv2d(3, 8, 7)
        self.c_act1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(2)

        self.conv2 = nn.Conv2d(8, 32, 3)
        self.c_act2 = nn.ReLU()
        self.bn1 = nn.BatchNorm2d(32)

        self.conv3 = nn.Conv2d(32, 64, 3)
        self.c_act3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(2)
        
        self.conv4 = nn.Conv2d(64, 128, 3)
        self.c_act4 = nn.ReLU()
        self.bn2 = nn.BatchNorm2d(128)

        self.conv5 = nn.Conv2d(128, 256, 3)
        self.c_act5 = nn.ReLU() 
        self.pool5 = nn.MaxPool2d(2)

        self.conv6 = nn.Conv2d(256, 256, 3)
        self.c_act6 = nn.ReLU()

        self.flattener = nn.Flatten()

        self.bn3 = nn.BatchNorm1d(9216)

        self.linear1 = nn.Linear(9216, 4096)
        self.l_act1 = nn.ReLU()

        self.linear2 = nn.Linear(4096, 1024)
        self.l_act2 = nn.ReLU()

        self.linear3 = nn.Linear(1024, n_classes)

    def forward(self, x):
        x = self.pool1(self.c_act1(self.conv1(x)))
        
        x = self.c_act2(self.conv2(x))
        x = self.bn1(x)
        
        x = self.pool3(self.c_act3(self.conv3(x)))
        
        x = self.c_act4(self.conv4(x))
        x = self.bn2(x)
        
        x = self.pool5(self.c_act5(self.conv5(x)))
        
        x = self.c_act6(self.conv6(x))

        x = self.flattener(x)
        x = self.bn3(x)
        
        x = self.l_act1(self.linear1(x))
        
        x = self.l_act2(self.linear2(x))
        
        x = self.linear3(x)
        return x


model = Net()

In [None]:
def train_valid_save(model, artifact_config,
                     preprocess_config,
                     train_config):
    
    art = wandb.Artifact(**artifact_config)
    exp_name = artifact_config['name']
    print('Making dataloaders...')
    train_dl, valid_dl = make_loaders(**preprocess_config)
    clear_output()
    train_losses, valid_losses, train_metric, valid_metric, train_time, valid_time = train_valid(
        model = model,
        train_dl = train_dl,
        valid_dl = valid_dl,
        exp_name = exp_name,
        **train_config
    )

    epochs = train_config['max_epochs']
    for_table = list(zip(range(1, epochs + 1), 
                         train_losses,
                         valid_losses,
                         train_metric,
                         valid_metric)) 
    
    tabled_cfg = wandb.Table(
        columns = ['Epoch', 'Train losses', 'Valid losses', 'Train score', 'Valid score'],
        data = for_table
    )

    # Model state dict
    art.add_file('./models/models/' + exp_name + '.pth',
                 name = 'state_dict.pth')
    
    # Losses and metrics
    art.add(tabled_cfg, 'Losses and scores table')

    # Add result description
    result_config = {'Train time': train_time,
                     'Valid time': valid_time,
                     'Device': device}

    # Add configuration
    common_config = {'Preprocess': preprocess_config,
                     'Training': train_config,
                     'Resulting': result_config}

    art.metadata = common_config

    x = next(model.modules())
    with open('./models/desc/' + exp_name + '.txt', 'w') as f:
        f.write(str(x))

    art.add_file('./models/desc/' + exp_name + '.txt',
                 name = 'desc.txt')

    wandb.log_artifact(art)

In [None]:
dataset_config = {
    'augmentate': True,
    'ex_amount': 2000,
    'image_shape': (224, 224)
}


preprocess_config = {
    'ds_cls': AdvancedCustomDataset,
    'ds_params': dataset_config,
    'train_bs': 128,
    'valid_bs': 256,
    'train_size': 0.9
}

train_config = {
    'opt_cls': torch.optim.Adam,
    'loss_fn': nn.CrossEntropyLoss(),
    'metric_fn': f1_score,
    'max_epochs': 10,
    'opt_params': {
        'lr': 5e-4
    },
    'scheduler_cls': torch.optim.lr_scheduler.StepLR,
    'scheduler_params': {
        'step_size': 4,
        'gamma': 0.55
    },
    'device': device
}

artifact_config = {
    'name': 'CNN_v.1',
    'type': 'model',
    'description': 
    '''Using advanced dataset;
    '''
}

In [None]:
train_valid_save(model = model,
                 artifact_config = artifact_config,
                 preprocess_config = preprocess_config,
                 train_config = train_config)

In [None]:
!nvidia-smi

Thu Apr 20 20:42:29 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   50C    P8    10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
wandb.finish()

Я пытался поменять способ рейшейпинга, но время обучения увеличилось в 2.5 раза.
Возможно, что это из-за weight decay.

Добавить батчнорм

ArcFace?

Лоссы вообще посмотреть

Гугл датасет сохранил на кагл

Файнтьюн?

## Файнтьюн

VGG13

In [None]:
import torchvision.models as models

vgg13 = models.vgg13_bn(weights = models.VGG13_BN_Weights)

Downloading: "https://download.pytorch.org/models/vgg13_bn-abd245e5.pth" to /root/.cache/torch/hub/checkpoints/vgg13_bn-abd245e5.pth
100%|██████████| 508M/508M [00:03<00:00, 167MB/s]


In [None]:
for x in vgg13.features.parameters():
    x.requires_grad = False

In [None]:
vgg13.classifier[-4] = nn.Linear(in_features = 4096,
                                 out_features = 1024)
vgg13.classifier[-1] = nn.Linear(in_features = 1024,
                                 out_features = len(labels))
vgg13

In [None]:
train_dl, valid_dl = make_loaders(**preprocess_config)
train_losses, valid_losses, train_metric, valid_metric, train_time, valid_time = train_valid(
        model = vgg13,
        train_dl = train_dl,
        valid_dl = valid_dl,
        exp_name = 'Vgg13',
        **train_config
    )

# TODO:
<font color='red'>


## Ускорение инференса и обучение

*    Добавить батчнорм
*    Прунинг (надо чтоб она хоть какое то качество показала)))

## Интерактивное создание сетей

Тут я пытался сделать небольшую утилку, которая всякими слайдерами и дропдаунами могла бы генерить слои нейронной сети. Просто по приколу.

In [None]:
def selector(**kwargs):

    def conv(**kwargs):
        return nn.Conv2d(**kwargs)
    
    def linear(**kwargs):
        return nn.Linear(**kwargs)

    def activation(**kwargs):
        cls = kwargs['cls']
        if cls == 'ReLU':
            return nn.ReLU()
        if cls == 'Tanh':
            return nn.Tanh()
        if cls == 'Sigmoid':
            return nn.Sigmoid()
    
    def dropout(**kwargs):
        return nn.Dropout1d(**kwargs)

    def pooling(**kwargs):
        pool_type = kwargs['pool_type']
        del kwargs['pool_type']
        if pool_type == 'max':
            return nn.MaxPool2d(**kwargs)
    
    layer = kwargs['layer_type']
    if layer == 'Conv2d':
        w = widgets.interact_manual(conv,
                                    in_channels = widgets.IntText(value = 1),
                                   out_channels = widgets.IntText(value = 1),
                                   kernel_size = widgets.IntText(value = 1),
                                   padding = widgets.IntText(value = 0),
                                   stride = widgets.IntText(value = 1))
    
    elif layer == 'Linear':
        w = widgets.interactive(linear, {'manual': True, 'auto_display': True},
                                   in_features = widgets.IntText(value = 1),
                                   out_features = widgets.IntText(value = 1))
        print(f'Linear {w}')

    elif layer == 'Activation':
        w = widgets.interactive(activation, 
                                {'manual': True, 'auto_display': True},
                                cls = ['ReLU', 'Tanh', 'Sigmoid'])
        print(f'ww - {w}')

    elif layer == 'Dropout':
        w = widgets.interact_manual(dropout,
                                   p = widgets.FloatText(value = 0.5,
                                                         min = 0,
                                                         max = 1))
    
    elif layer == 'Pooling':
        w =  widgets.interact_manual(pooling,
                                 pool_type = ['max', 'avg', 'min'],
                                 kernel_size = widgets.IntText(value = 1),
                                 padding = widgets.IntText(value = 0),
                                 stride = widgets.IntText(value = 1))
        
    display(w)
    print(f'w = {w}')
    return w


result = widgets.interactive(selector, layer_type = ['Conv2d',
                                                  'Pooling',
                                                  'Linear',
                                                  'Activation',
                                                  'Dropout'])

In [None]:
display(result)
z = result.children
print(f'z = {z}')

interactive(children=(Dropdown(description='layer_type', options=('Conv2d', 'Pooling', 'Linear', 'Activation',…

z = (Dropdown(description='layer_type', options=('Conv2d', 'Pooling', 'Linear', 'Activation', 'Dropout'), value='Conv2d'), Output())


In [None]:
class ConvLayer(nn.Module):
    _number = 1
    def __init__(self, conv_params, conv_cls = nn.Conv2d,
                 pooling_cls = None, pooling_params = None,
                 activation_cls = None, activation_params = None):
        '''
        Create conv layer: conv2d->pooling->activation
        *_params - dict with layer params
        *_cls - class of layer
        '''
        number = ConvLayer._number
        super().__init__()
        conv_layer = conv_cls(**conv_params)
        self.conv_layer = nn.Sequential()
        self.conv_layer.add_module(f'Conv_{number}', conv_layer)

        if pooling_cls:
            pooling_layer = pooling_cls(**pooling_params)
            self.conv_layer.add_module(f'Pooling_{number}', pooling_layer)

        if activation_cls:
            activation = activation_cls(**activation_params)
            self.conv_layer.add_module(f'Activation_{number}', activation)

        ConvLayer._number += 1

    def forward(self, x):
        return self.conv_layer(x)

In [None]:
class FCLayer(nn.Module):
    _number = 1
    def __init__(self, linear_params, linear_cls = nn.Linear,
                 dropout_cls = None, dropout_params = None,
                 activation_cls = None, activation_params = None):
        '''
        Create FC-layer: dropout->linear->activation
        If current layer is last, actvation can be replaced on
        something like Softmax, etc.
        '''

        super().__init__()
        self.fc_layer = nn.Sequential()
        number = FCLayer._number
        if dropout_cls:
            dropout_layer = dropout_cls(**dropout_params)
            self.fc_layer.add_module(f'Dropout_{number}', dropout_layer)
        
        linear = linear_cls(**linear_params)
        self.fc_layer.add_module(f'Linear_{number}', linear)
        
        if activation_cls:
            if activation_params:
                activation = activation_cls(**activation_params)
            else:
                activation = activation_cls()
            self.fc_layer.add_module(f'Activation_{number}', activation)

        FCLayer._number += 1

    def forward(self, x):
        return self.fc_layer(x)

In [None]:
class MiddleLayer(nn.Module):
    _number = 1
    def __init__(self, params = None):
        '''
        Create layers with middle layers
        params - tuple (layer_cls, dict(layer_params))
        '''
        super().__init__()
        number = MiddleLayer._number
        if isinstance(params, (tuple, list)):
            self.mid_layer = nn.Sequential()
            for elem in params:
                cls, cls_params = elem
                if cls_params:
                    layer = cls(**cls_params)
                else:
                    layer = cls()

                self.mid_layer.add_module(f'Mid_{cls.__name__}_{number}', layer)
        else:
            self.mid_layer = None

        MiddleLayer._number += 1

    def forward(self, x):
        
        if self.mid_layer:
            return self.mid_layer(x)
        else:
            return x

In [None]:
class CNN(nn.Module):
    def __init__(self, batch_norm = False,
                 conv_layers = None,
                 middle_layers = None,
                 fc_layers = None):
        '''
        Generate architecture of neural net.
        batch_norm - bool. If True, add batchnorm to input
        conv_layers - list with conv layers
        middle_layers - list with middle layers (mb bottleneck/flatten)
        fc_layers - list with fc layers
        '''

        super().__init__()
        if batch_norm:
            self.batch_norm = nn.BatchNorm2d(3) # Для RGB картинок.
        else:
            self.batch_norm = None
        self.conv_layers = nn.Sequential(*conv_layers)
        self.middle_layers = nn.Sequential(*middle_layers)
        self.fc_layers = nn.Sequential(*fc_layers)

    
    def forward(self, x):

        if self.batch_norm:
            x = self.batch_norm(x)
        
        if self.conv_layers:
            x = self.conv_layers(x)

        if self.middle_layers:
            x = self.middle_layers(x)

        if self.fc_layers:
            x = self.fc_layers(x)
        
        return x

    def readable_config(self):
        # Saving readable config
        x = cnn.children()
        ans = ''
        for y in x:
            ans += str(y) + '\n\n'
        return ans