# Imports

In [6]:
from pathlib import Path
import torch
from torch import nn
from torch import optim
from torchsummary import summary
from mltrainer import metrics, Trainer, TrainerSettings, ReportTypes
from mltrainer.preprocessors import BasePreprocessor
import mlflow
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope
from mads_datasets import DatasetFactoryProvider, DatasetType
from loguru import logger
from datetime import datetime

# Model

In [12]:
# Although this program will likely only run on the vm,
# and the vm lacks gpu support, a gpu check is added in case the
# program is run outside of the vm.

device = "gpu" if torch.cuda.is_available() else "cpu"
print(f"Model training takes place on the {device}")

# Model definement
class CNN(nn.Module):
    def __init__(self, filters: int, units1: int, units2: int, input_size: tuple=(32, 1, 28, 28)):
        super().__init__()

        self.convolutional_layers = nn.Sequential(
            nn.Conv2d(1, filters, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(filters, filters, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(filters, filters, kernel_size=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        # In order to solve the potential problem of connecting the image layers
        # with the linear layers, an AvgPool2d is added based on the size of the
        # activationmap from the convolutional layers.
        # Resulting in (batch, activationmaps, 1, 1) before flattening.
        activaton_map_size = self.conv_test(input_size)
        self.aggregated_layer = nn.AvgPool2d(activaton_map_size)

        self.dense_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(filters, units1),
            nn.ReLU(),
            nn.Linear(units1, units2),
            nn.ReLU(),
            nn.Linear(units2, 10)
        )

    def conv_test(self, input_size):
        input_size_matrix = torch.ones(input_size)
        conv_layers_output = self.convolutional_layers(input_size_matrix)
        return conv_layers_output.shape[-2:]
    
    def forward(self, input):
        conv_layers_output = self.convolutional_layers(input)
        agg_layer_output = self.aggregated_layer(conv_layers_output)
        dense_layers_output = self.dense_layers(agg_layer_output)
        return dense_layers_output

model = CNN(filters=32, units1=64, units2=32).to(device)

Model training takes place on the cpu
