<a href="https://colab.research.google.com/github/dvarkless/InnopolisDS/blob/main/homework/Neural_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Анализ некоторых параметров Полносвязной и Сверточной нейронной сети

In [11]:
# System and fundamental stuff
import sys
from collections import OrderedDict
import functools
import math
import time
from itertools import product

# Types
from typing import Callable
from typing import OrderedDict as OrderedDictType
from types import FunctionType

# ML stuff
import numpy as np
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torchmetrics
from torch.utils.data import DataLoader
from torchmetrics import Accuracy, Precision, Recall
from torchvision.datasets import MNIST
from torchvision.transforms import Compose, Normalize, ToTensor

# Other
from alive_progress import alive_bar

## Надстройка для nn.Sequential

In [5]:
class SeqModeler(nn.Sequential):
    def __init__(self, ord_dict: OrderedDictType, device='cpu') -> None:
        super().__init__(ord_dict)
        self.device = torch.device(device)
        self.to(self.device, non_blocking=True)

    def predict(self, X):
        X = self.forward(X)
        return torch.argmax(nn.Softmax(dim=1)(X), dim=1)


## Тренер нейронных сетей

In [16]:
class Trainer:
    __defaults = {
        'batch_size': 100,
        'device': 'cpu',
        'epochs': 20,
        'criterion': nn.CrossEntropyLoss(),
        'enable_print': False,
        'metrics': None,
    }
    __must_have_params = ['model_class', 'model_params', 'set_optimizer']

    def __init__(self, **hp) -> None:
        self.config = self.__defaults.copy()
        for name, val in hp.items():
            self.config[name] = val

        for name in self.__must_have_params:
            if name not in self.config:
                print(f'Error: config parameter "{name}" is missing')
                sys.exit(1)

        self.model = self.config['model_class'](**self.config['model_params'])
        opt_config = self.config['set_optimizer'].copy()
        opt_config['params'] = self.model.parameters()
        optimizer_name = opt_config.pop('name')
        self.optimizer = getattr(torch.optim, optimizer_name)(**opt_config)
        self.criterion = self.config['criterion']
        self.device = torch.device(self.config['device'])

    @property
    def data_batch(self):
        return self._data_batch

    @data_batch.setter
    def data_batch(self, data, /):
        if isinstance(data, torch.Tensor):
            self._data_batch = data.to(self.device, non_blocking=True).float()
        elif isinstance(data, np.ndarray):
            self.data_batch = torch.Tensor(data)
        else:
            raise ValueError(f'data of type {type(data)} is unacceptable')

    @property
    def targets_batch(self):
        return self._targets_batch

    @targets_batch.setter
    def targets_batch(self, targets):
        if isinstance(targets, torch.Tensor):
            self._targets_batch = targets.to(
                self.device, non_blocking=True)
        elif isinstance(targets, (np.ndarray, list, tuple)):
            self.targets_batch = torch.Tensor(targets)
        else:
            raise ValueError(f'data of type {type(targets)} is unacceptable')

    def fit(self, train_dataset, eval_dataset=None):
        train_dl = DataLoader(train_dataset, self.config['batch_size'])
        for epoch in range(self.config['epochs']):
            avg_loss = []
            for (inputs, targets) in train_dl:
                self.data_batch, self.targets_batch = inputs, targets
                self.optimizer.zero_grad()
                yhat = self.model(self.data_batch)
                loss = self.criterion(yhat, self.targets_batch)
                avg_loss.append(loss)
                loss.backward()
                self.optimizer.step()
            avg_loss = torch.Tensor(avg_loss).mean()
            avg_loss.to(self.device)
            if self.config['enable_print']:
                print(
                    f'==========Epoch {epoch+1}/{self.config["epochs"]}==========')
                print(f'Loss: {avg_loss}')
                if self.config['metrics'] and eval_dataset:
                    metric_data = self.evaluate(eval_dataset)
                    for metric, data in zip(self.config['metrics'], metric_data):
                        print(f'{metric.__class__.__name__} = {data:.3f}')
        return self

    def evaluate(self, eval_dataset):
        eval_dl = DataLoader(eval_dataset, batch_size=10000)
        for data, targets in eval_dl:
            self.data_batch, self.targets_batch = data, targets
            predictions = self.model.predict(self.data_batch)
            metric_data = []
            for metric in self.config['metrics']:
                metric_data.append(metric(predictions, self.targets_batch))
            return tuple(metric_data)

    def predict(self, X):
        return self.model.predict(X)

## Интерфейс для перебора параметров моделей

In [17]:
def timer(attr):
    """
        Декоратор используется для вывода времени,
        за которое выполняется метод класса
    """
    @functools.wraps(attr)
    def _wrapper(self, *args, **kwargs):
        start = time.perf_counter()
        result = attr(self, *args, **kwargs)
        runtime = time.perf_counter() - start
        print(f'{runtime:.3f}s')
        return result
    return _wrapper


class ModelRunner:
    """
        Класс предназначенный для удобного запуска моделей машинного обучения.

        Его возможности:
            - Создание экземпляров моделей с задаваемыми через словарь параметрами
              и их запуск через методы .fit() и .predict().
            - Вывод шкалы прогресса и времени выполнения методов моделей
            - Вывод различных метрик
            - Запуск одной модели с комбинацией различных параметров

        use case:
            >>> defaults = {'lr': 0.01, 'epochs': 100}
            >>> runner_inst = ModelRunner(ModelClass, timer=True, defaults=defaults, metrics=[accuracy])
            >>> runner_inst.run(training_data, eval_input, eval_answers, params={'lr': [0.001, 0.005], 'batch_size':[100],})

        inputs:
            model_class - Class of your model (not instance), all parameters should be passed through **kwargs
            defaults: dict - default kwargs for your model
            metrics: list - list of functions, they must take only two positional args: foo(preds, answers)
    """

    def __init__(self, model_class, defaults=None, metrics=None, responsive_bar=False) -> None:
        self.model_class = model_class
        self.metrics = metrics
        self._metric_data = []
        self._parameters_data = []
        if defaults is not None:
            self.defaults = defaults

        self._responsive_bar = responsive_bar

    def run(self, train, eval, params: dict, one_vs_one: bool = False):
        """
            Запустить проверку моделей с заданными данными и параметрами.

            Итерируемые параметры задаются в словаре params в виде:
                >>> params = {
                >>>     'lr': [1,2,3,4]
                >>>     'epochs': [100, 200]
                >>>     }
            Количество шагов проверки при этом зависит также от способа сочетания
            параметров:
                - При one_vs_one=True все доступные параметры сочетаются друг
                с другом, в данном примере получается 8 шагов

                - При one_vs_one=False параметры берутся по столбцам, при этом
                если в каком то списке не хватает значений, то берется его последнее
                значение в списке. В данном примере получается 4 шага

            inputs:
                train - training dataset, first column is answer labels
                eval_input - evaluation dataset without answers
                eval_answers - answer array in the same order as eval_input
                               size = (1, N)
                params - dict consisted of lists of the iterated parameters.
                        every value must be a list, even singular vals
                one_vs_one - parameters combination method, True is One vertus One;
                            False is columswise combination.

        """
        self._metric_data = []
        self._models = []
        curr_params = dict()
        if one_vs_one:
            # Проверка на наличие единственного значения в списке
            if len(list(params.values())) <= 1:
                pairs = list(*params.values())
            else:
                pairs = list(product(*list(params.values())))

            if self._responsive_bar:
                len_model_ticks = self.model_class(
                    self.defaults).define_tick(None, additive=len(eval))
            else:
                len_model_ticks = 1
            with alive_bar(len(list(pairs)*len_model_ticks), title=f'Проверка модели {self.model_class.__name__}', force_tty=True, bar='filling') as bar:
                # Распаковка параметров
                for vals in pairs:
                    for i, key in enumerate(params.keys()):
                        try:
                            curr_params[key] = vals[i]
                        except TypeError:
                            curr_params[key] = vals

                    print('-----With parameters-----')
                    for key, val in curr_params.items():
                        print(f'{key} = {val}')

                    self._parameters_data.append(list(curr_params.values()))
                    self._run_method(train, eval, curr_params, bar)
                    bar()  # продвижение полосы прогресса
        else:
            iter_lens = [len(val) for val in params.values()]
            if self._responsive_bar:
                len_model_ticks = self.model_class(
                    self.defaults).define_tick(None, additive=len(eval))
            else:
                len_model_ticks = 1
            max_len = max(iter_lens)
            with alive_bar(max_len*len_model_ticks, title=f'Проверка модели {self.model_class.__name__}', force_tty=True, bar='filling') as bar:
                for i in range(max_len):
                    for pos, key in enumerate(params.keys()):
                        this_len = iter_lens[pos]
                        try:
                            curr_params[key] = params[key][min(
                                this_len - 1, i)]
                        except TypeError:
                            curr_params[key] = params[key]

                    print('-----With parameters-----')
                    for key, val in curr_params.items():
                        print(f'{key} = {val}')

                    self._parameters_data.append(list(curr_params.values()))
                    self._run_method(train, eval, curr_params, bar)
                    bar()  # продвижение полосы прогресса

        print("===============RESULTS=================")
        pos = self._highest_metric_pos(self._metric_data)
        print(f'On iteration {pos}:')
        print(f"With hyperparameters: {self._parameters_data[pos]}")
        print(f'Got metrics: {self._metric_data[pos]}')

    def _run_method(self, train, eval, params: dict, bar_obj: Callable):
        """
            Внутренний обработчик ввода и вывода данных модели

            inputs:
                train - training dataset, first column is answer labels
                eval_input - evaluation dataset without answers
                eval_answers - answer array in the same order as eval_input
                               size = (1, N)
                params - dict of parameters that will be directly passed to the model


        """

        params_to_pass = self._mix_params(self.defaults, params)
        self.model = self.model_class(**params_to_pass)
    
        self.device = self.model.device

        eval_data, eval_targets = eval.data.to(self.device), eval.targets.to(self.device)
        if self._responsive_bar:
            self.model.define_tick(bar_obj, len(eval_targets))

        print('~fit complete in ', end='')
        self._run_train(train, eval)

        print('~eval complete in ', end='')
        answer = self._run_eval(eval_data)
        self._comma_metrics(answer, eval_targets)
        self._models.append(self.model)

    def _mix_params(self, main, invasive):
        """
            Внутренний метод для изменения словаря с параметрами

            Вносит изменения в основной словарь с параметрами 
            из другого словаря. Основной словарь при этом не меняется.

            inputs:
                main: dict - dict to be  inserted values into
                invasive: dict - mixed in values
            output - new dict with mixed values
        """
        maincpy = main.copy()
        for key, val in invasive.items():
            maincpy[key] = val
        return maincpy

    def _comma_metrics(self, preds, evals):
        """
            Внутренний метод для получения метрик модели

            Можно в последствии получить все метрики через
            метод ModelRunner.get_metrics()

            inputs:
                preds: np.ndarray - model's predictions
                evals: np.ndarray - true labels
        """
        if not isinstance(evals, torch.Tensor):
            evals = torch.Tensor(evals).to(self.device).int()
        buff = []
        for metric in self.metrics:
            res = metric(preds, evals)
            if isinstance(metric, FunctionType):
                print(f"    {metric.__name__} = {res:.3f}")
            else:
                print(f"    {metric.__class__.__name__} = {res:.3f}")
            buff.append(res)
        self._metric_data.append(buff)

    def _highest_metric_pos(self, metrics):
        """
            Внутренний метод для получения позиции
            наибольшего значения метрик.

            Если видов метрик больше 1, то сравнивается их
            среднее геометрическое.

            inputs: 
                metrics: list - list of metrics, list of lists or
                list of floats
            output - index of the biggest value
        """
        score = [math.prod(vals) for vals in metrics]
        return score.index(max(score))

    def get_models(self):
        """
            Получить список со всеми использованными моделями

            output - list of all calculated models
        """

        return self._models

    def get_metrics(self):
        """
            Получить список со всеми значениями метрик

            Если метрик больше одной, то выдается список
            списков. Далее самим можно понять где какая метрика, 
            это не сложно

            output - list of all calculated metrics
        """
        return self._metric_data

    def get_params(self):
        """
            Получить список со всеми использованными
            гиперпараметрами

            Совпадает с тем, что передавалось в конструктор
            класса и в метод ModelRunner.run()

            output - list of hyperparameters
        """
        return self._parameters_data

    @timer
    def _run_train(self, train, eval):
        """
            Внутренний метод для запуска процесса 
            тренировки модели.

            inputs:
                train - training data
            decorator prints the time it takes to run
            this method
        """
        self.model.fit(train, eval)

    @timer
    def _run_eval(self, eval_input):
        """
            Внутренний метод для получения ответов модели.

            inputs:
                eval_input - data to process
            output: np.ndarray - model's predictions

            decorator prints the time it takes to run
            this method
        """
        self.model.data_batch = eval_input
        return self.model.predict(self.model.data_batch)

## Задаем начальные параметры
Датасет - MNIST

In [20]:
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    trans = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))])
    train_dataset = MNIST('data/', train=True, download=True, transform=trans)
    val_dataset = MNIST('data/', train=False, download=True, transform=trans)
    model_params = OrderedDict([
        ('batch1', nn.BatchNorm2d(1)),
        ('conv1', nn.Conv2d(1, 16, (2, 2), stride=1, padding=1)),
        ('relu1', nn.ReLU()),
        ('maxpool1', nn.MaxPool2d((2, 2))),

        ('batch2', nn.BatchNorm2d(16)),
        ('conv2', nn.Conv2d(16, 64, (2, 2), stride=1, padding=1)),
        ('relu2', nn.ReLU()),
        ('maxpool2', nn.MaxPool2d((2, 2))),

        ('flatten3', nn.Flatten()),
        ('batch3', nn.BatchNorm1d(64*7*7)),
        ('linear3', nn.Linear(64*7*7, 100)),
        ('relu3', nn.ReLU()),
        ('linear4', nn.Linear(100, 10)),
        ('relu4', nn.ReLU()),
    ])

    optim_params = {
        'name': 'Adam',
        'params': None,
        'lr': 1e-3,
    }
    trainer_hp = {
        'batch_size': 50,
        'model_class': SeqModeler,
        'model_params': {'ord_dict': model_params, 'device': device},
        'set_optimizer': optim_params,
        'device': device,
        'criterion': nn.CrossEntropyLoss(),
        'enable_print': False,
        'metrics': [Accuracy(num_classes=10, average='macro').to(device), Recall(num_classes=10, average='macro').to(device), Precision(num_classes=10, average='macro').to(device)]
    }

### Тестирование полносвязной сети
##### Скорость обучения

In [21]:
model_params = OrderedDict([
        ('flatten1',nn.Flatten()),
        ('linear1', nn.Linear(784, 400)),
        ('relu1', nn.ReLU()),
        ('linear2', nn.Linear(400, 100)),
        ('relu2', nn.ReLU()),
        ('linear3', nn.Linear(100, 10)),
        ('relu3', nn.ReLU()),
    ])
optim_params_1 = {
    'name': 'Adam',
    'params': None,
    'lr': 1e-2,
}
optim_params_2 = {
    'name': 'Adam',
    'params': None,
    'lr': 1e-3,
}

optim_params_3 = {
    'name': 'Adam',
    'params': None,
    'lr': 1e-4,
}

optim_params_4 = {
    'name': 'Adam',
    'params': None,
    'lr': 1e-5,
}

trainer_hp['model_params']['ord_dict'] = model_params



params = {
    'set_optimizer': [optim_params_1, optim_params_2, optim_params_3, optim_params_4]
}

model_runner = ModelRunner(Trainer, trainer_hp, metrics=trainer_hp['metrics'])
model_runner.run(train_dataset, val_dataset, params)
del model_runner

on 0: -----With parameters-----
on 0: set_optimizer = {'name': 'Adam', 'params': None, 'lr': 0.01}
on 0: ~fit complete in 231.856s
on 0: ~eval complete in 0.002s
on 0:     Accuracy = 0.395
on 0:     Recall = 0.395
on 0:     Precision = 0.348
on 1: -----With parameters-----
on 1: set_optimizer = {'name': 'Adam', 'params': None, 'lr': 0.001}
on 1: ~fit complete in 228.532s
on 1: ~eval complete in 0.002s
on 1:     Accuracy = 0.396
on 1:     Recall = 0.396
on 1:     Precision = 0.437
on 2: -----With parameters-----
on 2: set_optimizer = {'name': 'Adam', 'params': None, 'lr': 0.0001}
on 2: ~fit complete in 228.466s
on 2: ~eval complete in 0.003s
on 2:     Accuracy = 0.395
on 2:     Recall = 0.395
on 2:     Precision = 0.434
on 3: -----With parameters-----
on 3: set_optimizer = {'name': 'Adam', 'params': None, 'lr': 1e-05}
on 3: ~fit complete in
Проверка модели Trainer |██████████████████████████████⚠︎         | (!) 3/4 [75%] in 13:12.5 (0.00/s)                    


KeyboardInterrupt: ignored

### Количество слоев

In [None]:
model_params_1 = OrderedDict([
        ('flatten1',nn.Flatten()),
        ('linear1', nn.Linear(784, 10)),
        ('relu1', nn.ReLU()),
    ])
params1 = {'ord_dict': model_params_1, 'device': device}
model_params_2 = OrderedDict([
        ('flatten1',nn.Flatten()),
        ('linear1', nn.Linear(784, 200)),
        ('relu1', nn.ReLU()),
        ('linear2', nn.Linear(200, 10)),
        ('relu2', nn.ReLU()),
    ])
    
params2 = {'ord_dict': model_params_2, 'device': device}
model_params_3 = OrderedDict([
        ('flatten1',nn.Flatten()),
        ('linear1', nn.Linear(784, 400)),
        ('relu1', nn.ReLU()),
        ('linear2', nn.Linear(400, 100)),
        ('relu2', nn.ReLU()),
        ('linear3', nn.Linear(100, 10)),
        ('relu3', nn.ReLU()),
    ])

params3 = {'ord_dict': model_params_3, 'device': device}


params = {
    'model_params': [params1, params2, params3]
}

model_runner = ModelRunner(Trainer, trainer_hp, metrics=trainer_hp['metrics'])
model_runner.run(train_dataset, val_dataset, params)
del model_runner

### Количество нейронов в слоях

In [None]:
model_params_1 = OrderedDict([
        ('flatten1',nn.Flatten()),
        ('linear1', nn.Linear(784, 100)),
        ('relu1', nn.ReLU()),
        ('linear2', nn.Linear(100, 20)),
        ('relu2', nn.ReLU()),
        ('linear3', nn.Linear(20, 10)),
        ('relu3', nn.ReLU()),
    ]))
params1 = {'ord_dict': model_params_1, 'device': device}
model_params_2 = OrderedDict([
        ('flatten1',nn.Flatten()),
        ('linear1', nn.Linear(784, 400)),
        ('relu1', nn.ReLU()),
        ('linear2', nn.Linear(400, 100)),
        ('relu2', nn.ReLU()),
        ('linear3', nn.Linear(100, 10)),
        ('relu3', nn.ReLU()),
    ])
    
params2 = {'ord_dict': model_params_2, 'device': device}
model_params_3 = OrderedDict([
        ('flatten1',nn.Flatten()),
        ('linear1', nn.Linear(784, 600)),
        ('relu1', nn.ReLU()),
        ('linear2', nn.Linear(600, 300)),
        ('relu2', nn.ReLU()),
        ('linear3', nn.Linear(300, 10)),
        ('relu3', nn.ReLU()),
    ])

params3 = {'ord_dict': model_params_3, 'device': device}

params = {
    'model_params': [params1, params2, params3]
}

model_runner = ModelRunner(Trainer, trainer_hp, metrics=trainer_hp['metrics'])
model_runner.run(train_dataset, val_dataset, params)
del model_runner

### Функции активации

In [None]:
model_params_1 = OrderedDict([
        ('flatten1',nn.Flatten()),
        ('linear1', nn.Linear(784, 400)),
        ('softplus1', nn.Softplus()),
        ('linear2', nn.Linear(400, 100)),
        ('softplus2', nn.Softplus(),
        ('linear3', nn.Linear(100, 10)),
        ('softplus3',nn.Softplus(), 
    ]))
params1 = {'ord_dict': model_params_1, 'device': device}
model_params_2 = OrderedDict([
        ('flatten1',nn.Flatten()),
        ('linear1', nn.Linear(784, 400)),
        ('sigmoid1', nn.Sigmoid()),
        ('linear2', nn.Linear(400, 100)),
        ('sigmoid2', nn.Sigmoid()),
        ('linear3', nn.Linear(100, 10)),
        ('sigmoid3', nn.Sigmoid()),
    ])
    
params2 = {'ord_dict': model_params_2, 'device': device}
model_params_3 = OrderedDict([
        ('flatten1',nn.Flatten()),
        ('linear1', nn.Linear(784, 400)),
        ('tanh1', nn.Tanh()),
        ('linear2', nn.Linear(400, 100)),
        ('tanh2', nn.Tanh()),
        ('linear3', nn.Linear(100, 10)),
        ('tanh3', nn.Tanh()),
    ])

params3 = {'ord_dict': model_params_3, 'device': device}

params = {
    'model_params': [params1, params2, params3]
}

model_runner = ModelRunner(Trainer, trainer_hp, metrics=trainer_hp['metrics'])
model_runner.run(train_dataset, val_dataset, params)
del model_runner

## Сверточная нейронная сеть

### Скорость обучения

In [None]:
model_params = OrderedDict([
    ('conv1', nn.Conv2d(1, 16, (2, 2), stride=1, padding=1)),
    ('relu1', nn.ReLU()),
    ('maxpool1', nn.MaxPool2d((2, 2))),

    ('conv2', nn.Conv2d(16, 64, (2, 2), stride=1, padding=1)),
    ('relu2', nn.ReLU()),
    ('maxpool2', nn.MaxPool2d((2, 2))),

    ('flatten3', nn.Flatten()),
    ('linear3', nn.Linear(64*7*7, 100)),
    ('relu3', nn.ReLU()),
    ('linear4', nn.Linear(100, 10)),
    ('relu4', nn.ReLU()),
    ])

optim_params_1 = {
    'name': 'Adam',
    'params': None,
    'lr': 1e-2,
}
optim_params_2 = {
    'name': 'Adam',
    'params': None,
    'lr': 1e-3,
}

optim_params_3 = {
    'name': 'Adam',
    'params': None,
    'lr': 1e-4,
}

optim_params_4 = {
    'name': 'Adam',
    'params': None,
    'lr': 1e-5,
}

trainer_hp['model_params']['ord_dict'] = model_params


params = {
    'set_optimizer': [optim_params_1, optim_params_2, optim_params_3, optim_params_4]
}

model_runner = ModelRunner(Trainer, trainer_hp, metrics=trainer_hp['metrics'])
model_runner.run(train_dataset, val_dataset, params)
del model_runner

## Количество слоев

In [None]:
model_params_1 = OrderedDict([
    ('conv1', nn.Conv2d(1, 16, (2, 2), stride=1, padding=1)),
    ('relu1', nn.ReLU()),
    ('maxpool1', nn.MaxPool2d((2, 2))),

    ('flatten3', nn.Flatten()),
    ('linear3', nn.Linear(16*14*14, 100)),
    ('relu3', nn.ReLU()),
    ('linear4', nn.Linear(100, 10)),
    ('relu4', nn.ReLU()),
    ])
    
params1 = {'ord_dict': model_params_1, 'device': device}

model_params_2 = OrderedDict([
    ('conv1', nn.Conv2d(1, 16, (2, 2), stride=1, padding=1)),
    ('relu1', nn.ReLU()),
    ('maxpool1', nn.MaxPool2d((2, 2))),

    ('conv2', nn.Conv2d(16, 64, (2, 2), stride=1, padding=1)),
    ('relu2', nn.ReLU()),
    ('maxpool2', nn.MaxPool2d((2, 2))),

    ('flatten3', nn.Flatten()),
    ('linear3', nn.Linear(64*7*7, 100)),
    ('relu3', nn.ReLU()),
    ('linear4', nn.Linear(100, 10)),
    ('relu4', nn.ReLU()),
    ])
params2 = {'ord_dict': model_params_2, 'device': device}
    
model_params_3 = OrderedDict([
    ('conv1', nn.Conv2d(1, 16, (2, 2), stride=1, padding=1)),
    ('relu1', nn.ReLU()),
    ('maxpool1', nn.MaxPool2d((2, 2))),

    ('conv2', nn.Conv2d(16, 32, (2, 2), stride=1, padding=1)),
    ('relu2', nn.ReLU()),
    ('maxpool2', nn.MaxPool2d((2, 2))),

    ('conv3', nn.Conv2d(32, 64, (2, 2), stride=1, padding=1)),
    ('relu3', nn.ReLU()),
    ('maxpool3', nn.MaxPool2d((2, 2))),

    ('flatten3', nn.Flatten()),
    ('linear3', nn.Linear(64*3*3, 100)),
    ('relu3', nn.ReLU()),
    ('linear4', nn.Linear(100, 10)),
    ('relu4', nn.ReLU()),
    ])
params3 = {'ord_dict': model_params_3, 'device': device}

params = {
    'model_params': [params1, params2, params3],
}

model_runner = ModelRunner(Trainer, trainer_hp, metrics=trainer_hp['metrics'])
model_runner.run(train_dataset, val_dataset, params)
del model_runner

### Количество нейронов

In [None]:
model_params_1 = OrderedDict([
    ('conv1', nn.Conv2d(1, 8, (2, 2), stride=1, padding=1)),
    ('relu1', nn.ReLU()),
    ('maxpool1', nn.MaxPool2d((2, 2))),

    ('conv2', nn.Conv2d(8, 16, (2, 2), stride=1, padding=1)),
    ('relu2', nn.ReLU()),
    ('maxpool2', nn.MaxPool2d((2, 2))),

    ('flatten3', nn.Flatten()),
    ('linear3', nn.Linear(16*7*7, 100)),
    ('relu3', nn.ReLU()),
    ('linear4', nn.Linear(100, 10)),
    ('relu4', nn.ReLU()),
    ])
params1 = {'ord_dict': model_params_1, 'device': device}

model_params_2 = OrderedDict([
    ('conv1', nn.Conv2d(1, 16, (2, 2), stride=1, padding=1)),
    ('relu1', nn.ReLU()),
    ('maxpool1', nn.MaxPool2d((2, 2))),

    ('conv2', nn.Conv2d(16, 64, (2, 2), stride=1, padding=1)),
    ('relu2', nn.ReLU()),
    ('maxpool2', nn.MaxPool2d((2, 2))),

    ('flatten3', nn.Flatten()),
    ('linear3', nn.Linear(64*7*7, 100)),
    ('relu3', nn.ReLU()),
    ('linear4', nn.Linear(100, 10)),
    ('relu4', nn.ReLU()),
    ])
params2 = {'ord_dict': model_params_2, 'device': device}

model_params_3 = OrderedDict([
    ('conv1', nn.Conv2d(1, 64, (2, 2), stride=1, padding=1)),
    ('relu1', nn.ReLU()),
    ('maxpool1', nn.MaxPool2d((2, 2))),

    ('conv2', nn.Conv2d(64, 128, (2, 2), stride=1, padding=1)),
    ('relu2', nn.ReLU()),
    ('maxpool2', nn.MaxPool2d((2, 2))),

    ('flatten3', nn.Flatten()),
    ('linear3', nn.Linear(128*7*7, 100)),
    ('relu3', nn.ReLU()),
    ('linear4', nn.Linear(100, 10)),
    ('relu4', nn.ReLU()),
    ])
params3 = {'ord_dict': model_params_3, 'device': device}

params = {
    'model_params': [params1, params2, params3],
}

model_runner = ModelRunner(Trainer, trainer_hp, metrics=trainer_hp['metrics'])
model_runner.run(train_dataset, val_dataset, params)
del model_runner

### Функции активации
Вместо софтплюс возьму LeakyRELU

In [None]:
model_params_1 = OrderedDict([
    ('conv1', nn.Conv2d(1, 16, (2, 2), stride=1, padding=1)),
    ('relu1', nn.LeakyReLU()),
    ('maxpool1', nn.MaxPool2d((2, 2))),

    ('conv2', nn.Conv2d(16, 64, (2, 2), stride=1, padding=1)),
    ('relu2', nn.LeakyReLU()),
    ('maxpool2', nn.MaxPool2d((2, 2))),

    ('flatten3', nn.Flatten()),
    ('linear3', nn.Linear(64*7*7, 100)),
    ('relu3', nn.LeakyReLU()),
    ('linear4', nn.Linear(100, 10)),
    ('relu4', nn.LeakyReLU()),
    ])
params1 = {'ord_dict': model_params_1, 'device': device}

model_params_2 = OrderedDict([
    ('conv1', nn.Conv2d(1, 16, (2, 2), stride=1, padding=1)),
    ('sigmoid1', nn.Sigmoid()),
    ('maxpool1', nn.MaxPool2d((2, 2))),

    ('conv2', nn.Conv2d(16, 64, (2, 2), stride=1, padding=1)),
    ('sigmoid2', nn.Sigmoid()),
    ('maxpool2', nn.MaxPool2d((2, 2))),

    ('flatten3', nn.Flatten()),
    ('linear3', nn.Linear(64*7*7, 100)),
    ('sigmoid3', nn.Sigmoid()),
    ('linear4', nn.Linear(100, 10)),
    ('sigmoid4', nn.Sigmoid()),
    ])
params2 = {'ord_dict': model_params_2, 'device': device}

model_params_3 = OrderedDict([
    ('conv1', nn.Conv2d(1, 16, (2, 2), stride=1, padding=1)),
    ('tanh1', nn.Tanh()),
    ('maxpool1', nn.MaxPool2d((2, 2))),

    ('conv2', nn.Conv2d(16, 64, (2, 2), stride=1, padding=1)),
    ('tanh2', nn.Tanh()),
    ('maxpool2', nn.MaxPool2d((2, 2))),

    ('flatten3', nn.Flatten()),
    ('linear3', nn.Linear(64*7*7, 100)),
    ('tanh3', nn.Tanh()),
    ('linear4', nn.Linear(100, 10)),
    ('tanh4', nn.tanh4()),
    ])
params3 = {'ord_dict': model_params_3, 'device': device}

params = {
    'model_params': [params1, params2, params3],
}

model_runner = ModelRunner(Trainer, trainer_hp, metrics=trainer_hp['metrics'])
model_runner.run(train_dataset, val_dataset, params)
del model_runner

### Изменение фильтра