# Imports

In [1]:
from pathlib import Path
import torch
from torch import nn
from torch import optim
from mltrainer import metrics, Trainer, TrainerSettings, ReportTypes
from mltrainer.preprocessors import BasePreprocessor
import mlflow
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope
from mads_datasets import DatasetFactoryProvider, DatasetType
from loguru import logger
from datetime import datetime

# Datastreamer

In [2]:
datafactory = DatasetFactoryProvider.create_factory(DatasetType.FASHION)
batchsize = 64
preprocessor = BasePreprocessor()
streamers = datafactory.create_datastreamer(batchsize=batchsize, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]
train_streamer = train.stream()
valid_streamer = valid.stream()

[32m2023-12-16 15:00:00.585[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at /home/azureuser/.cache/mads_datasets/fashionmnist[0m
[32m2023-12-16 15:00:00.587[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m124[0m - [1mFile already exists at /home/azureuser/.cache/mads_datasets/fashionmnist/fashionmnist.pt[0m


# Model

In [4]:
# Although this program will likely only run on the vm,
# and the vm lacks gpu support, a gpu check is added in case the
# program is run outside of the vm.

device = "gpu" if torch.cuda.is_available() else "cpu"
print(f"Model training takes place on the {device}")

# Model definement
class CNN(nn.Module):
    def __init__(self, filters: int, units1: int, units2: int, dropout: float, input_size: tuple=(32, 1, 28, 28)):
        super().__init__()

        self.convolutional_layers = nn.Sequential(
            nn.Conv2d(1, filters, kernel_size=3),
            nn.BatchNorm2d(filters),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(filters, filters, kernel_size=3),
            nn.BatchNorm2d(filters),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(filters, filters, kernel_size=3),
            nn.BatchNorm2d(filters),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)
        )

        # In order to solve the potential problem of connecting the image layers
        # with the linear layers, an AvgPool2d is added based on the size of the
        # activationmap from the convolutional layers.
        # Resulting in (batch, activationmaps, 1, 1) before flattening.
        activaton_map_size = self.conv_test(input_size)
        self.aggregated_layer = nn.AvgPool2d(activaton_map_size)

        self.dense_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(filters, units1),
            nn.BatchNorm1d(units1),
            nn.ReLU(),
            nn.Linear(units1, units2),
            nn.BatchNorm1d(units2),
            nn.ReLU(),
            nn.Dropout1d(dropout),
            nn.Linear(units2, 10)
        )

    def conv_test(self, input_size):
        input_size_matrix = torch.ones(input_size)
        conv_layers_output = self.convolutional_layers(input_size_matrix)
        return conv_layers_output.shape[-2:]
    
    def forward(self, input):
        conv_layers_output = self.convolutional_layers(input)
        agg_layer_output = self.aggregated_layer(conv_layers_output)
        dense_layers_output = self.dense_layers(agg_layer_output)
        return dense_layers_output

model = CNN(filters=32, units1=64, units2=32, dropout=0.2).to(device)

Model training takes place on the cpu


# Optimizer, loss & accuracy

In [5]:
optimizer = optim.Adam
loss_function = torch.nn.CrossEntropyLoss()
accuracy = metrics.Accuracy

# MLflow

In [6]:
experiment_path = "hyper_parameter_exercise"
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment(experiment_path)

2023/12/16 15:00:37 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2023/12/16 15:00:37 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Running upgrade  -> 451aebb31d03, add metric step
INFO  [alembic.runtime.migration] Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
INFO  [alembic.runtime.migration] Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
INFO  [alembic.runtime.migration] Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
INFO  [alembic.runtime.migration] Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
INFO  [alembic.runtime.migration] Running upgrade 7ac759974ad8 -> 89d4b8295536, create latest metrics table
INFO  [89d4b8295536_create_latest_metrics_table_py] Migration complete!
INFO  

<Experiment: artifact_location='/home/azureuser/code/ADS-DeepLearning-project/hyper_parameter_exercise/notebook/mlruns/1', creation_time=1702738838363, experiment_id='1', last_update_time=1702738838363, lifecycle_stage='active', name='hyper_parameter_exercise', tags={}>