# Imports

In [1]:
from pathlib import Path
import torch
from torch import nn
from torch import optim
from mltrainer import metrics, Trainer, TrainerSettings, ReportTypes
from mltrainer.preprocessors import BasePreprocessor
import mlflow
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope
from mads_datasets import DatasetFactoryProvider, DatasetType
from loguru import logger
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt

# Datastreamer

In [2]:
datafactory = DatasetFactoryProvider.create_factory(DatasetType.FASHION)
batchsize = 64
preprocessor = BasePreprocessor()
streamers = datafactory.create_datastreamer(batchsize=batchsize, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]
train_streamer = train.stream()
valid_streamer = valid.stream()

[32m2023-12-17 08:36:13.187[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at /home/azureuser/.cache/mads_datasets/fashionmnist[0m
[32m2023-12-17 08:36:13.188[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m124[0m - [1mFile already exists at /home/azureuser/.cache/mads_datasets/fashionmnist/fashionmnist.pt[0m


# Model

In [27]:
# Although this program will likely only run on the vm,
# and the vm lacks gpu support, a gpu check is added in case the
# program is ran outside of the vm.

device = "gpu" if torch.cuda.is_available() else "cpu"
print(f"Model training takes place on the {device}")


class CNN(nn.Module):
    def __init__(
        self,
        layers: int,
        filters: int,
        units: int,
        normalization: bool = False,
        dropout: float = 0.0,
        input_size: tuple = (32, 1, 28, 28),
    ):
        super().__init__()

        self.convolutional_blocks = nn.ModuleList()
        for i in range(1, layers):
            first = i == 1
            self.convolutional_blocks.append(
                self._generate_convolutional_layer(filters=filters, normalization=normalization, first=first)
            )

        # In order to solve the potential problem of connecting the image layers
        # with the linear layers, an AvgPool2d is added based on the size of the
        # activationmap from the convolutional layers.
        # Resulting in (batch, activationmaps, 1, 1) before flattening.
        activaton_map_size = self._conv_test(input_size)
        self.aggregated_layer = nn.AvgPool2d(activaton_map_size)

        self.linear_blocks = nn.ModuleList()
        for i in range(1, layers):
            first = i == 1
            last = i == (layers - 1)
            self.linear_blocks.append(
                self._generate_linear_layers(
                    filters=filters,
                    units=units,
                    dropout=dropout,
                    normalization=normalization,
                    first=first,
                    last=last,
                )
            )

    def _generate_convolutional_layer(
        self,
        filters: int, normalization: bool = False, first: bool = False
    ) -> nn.Sequential:
        in_channels = 1 if first else filters
        layers = []
        layers.append(nn.Conv2d(in_channels, filters, kernel_size=3))
        layers.append(nn.BatchNorm2d(filters)) if normalization else None
        layers.append(nn.ReLU())
        layers.append(nn.MaxPool2d(kernel_size=2))
        return nn.Sequential(*layers)

    def _conv_test(self, input_size):
        x = torch.ones(input_size)
        for block in self.convolutional_blocks:
            x = block(x)
        return x.shape[-2:]

    def _generate_linear_layers(
        self,
        filters: int,
        units: int,
        dropout: float = 0.0,
        normalization: bool = False,
        first: bool = False,
        last: bool = False,
    ) -> nn.Sequential:
        in_features = filters if first else units
        out_features = 10 if last else units
        layers = []
        layers.append(nn.Flatten()) if first else None
        layers.append(nn.Dropout1d(dropout)) if dropout != 0.0 and last else None
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.BatchNorm1d(units)) if normalization and not last else None
        layers.append(nn.ReLU()) if not last else None
        return nn.Sequential(*layers)

    def forward(self, x):
        for block in self.convolutional_blocks:
            x = block(x)
        x = self.aggregated_layer(x)
        for block in self.linear_blocks:
            x = block(x)
        return x


model = CNN(layers=2, filters=32, units=32, dropout=0.2).to(device)


Model training takes place on the cpu


# Optimizer, loss & accuracy

In [17]:
optimizer = optim.Adam
loss_function = torch.nn.CrossEntropyLoss()
accuracy = metrics.Accuracy()

# MLflow

In [19]:
experiment_path = "MNIST_CNN"
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment(experiment_path)

2023/12/17 11:22:45 INFO mlflow.tracking.fluent: Experiment with name 'MNIST_CNN' does not exist. Creating a new experiment.


<Experiment: artifact_location='/home/azureuser/code/ADS-DeepLearning-project/hyper_parameter_exercise/notebook/mlruns/3', creation_time=1702812165353, experiment_id='3', last_update_time=1702812165353, lifecycle_stage='active', name='MNIST_CNN', tags={}>

In [20]:
model_directory = Path("../../models/mnist").resolve()
if not model_directory.exists():
    model_directory.mkdir(parents=True)
    print(f"Created {model_directory}")

In [24]:
trainer_settings = TrainerSettings(
    epochs=3,
    metrics=[accuracy],
    logdir="modellog",
    train_steps=100,
    valid_steps=100,
    reporttypes=[ReportTypes.MLFLOW]
)

def objective(params):
    with mlflow.start_run():
        mlflow.set_tag("model", "convolutional_network")
        mlflow.set_tag("dev", "tom")
        mlflow.log_params(params)
        mlflow.log_param("batchsize", f"{batchsize}")

        model = CNN(**params)
        trainer = Trainer(
            model=model,
            settings=trainer_settings,
            loss_fn=loss_function,
            optimizer=optimizer,
            traindataloader=train_streamer,
            validdataloader=valid_streamer,
            scheduler=optim.lr_scheduler.ReduceLROnPlateau
        )
        trainer.loop()

        tag = datetime.now().strftime("%Y%m%d-%H%M")
        model_path = model_directory / (tag + "model.pt")
        torch.save(model, model_path)

        mlflow.log_artifact(local_path=model_path, artifact_path="pytorch_models")
        
        return {"loss": trainer.test_loss, "status": STATUS_OK}

In [32]:
search_space = {
    "layers": scope.int(hp.quniform("layers", 2, 5, 1)),
    "units": scope.int(hp.quniform("units", 32, 128, 8)),
    "filters": 32,
}

In [33]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=100,
    trials=Trials()
)

  0%|          | 0/3 [00:00<?, ?trial/s, best loss=?]

[32m2023-12-17 11:29:54.267[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to modellog/20231217-112954[0m
[32m2023-12-17 11:29:55.618[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m65[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
  0%|[38;2;30;71;6m          [0m| 0/3 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/100 [00:00<?, ?it/s][A
  1%|[38;2;30;71;6m1         [0m| 1/100 [00:00<00:34,  2.83it/s][A
  5%|[38;2;30;71;6m5         [0m| 5/100 [00:00<00:07, 13.35it/s][A
 10%|[38;2;30;71;6m#         [0m| 10/100 [00:00<00:03, 22.67it/s][A
 14%|[38;2;30;71;6m#4        [0m| 14/100 [00:00<00:03, 27.37it/s][A
 18%|[38;2;30;71;6m#8        [0m| 18/100 [00:00<00:02, 30.42it/s][A
 23%|[38;2;30;71;6m##3       [0m| 23/100 [00:00<00:02, 35.89it/s][A
 28%|[38;2;30;71;6m##8       [0m| 28/100 [00:00<00:01, 39.67it/s][A
 34%|[38;2;30;71;6m

 33%|███▎      | 1/3 [00:13<00:26, 13.11s/trial, best loss: 2.1145686292648316]

[32m2023-12-17 11:30:07.181[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to modellog/20231217-113007[0m
[32m2023-12-17 11:30:07.185[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m65[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
  0%|[38;2;30;71;6m          [0m| 0/3 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/100 [00:00<?, ?it/s][A
  2%|[38;2;30;71;6m2         [0m| 2/100 [00:00<00:04, 19.88it/s][A
  5%|[38;2;30;71;6m5         [0m| 5/100 [00:00<00:03, 25.43it/s][A
  8%|[38;2;30;71;6m8         [0m| 8/100 [00:00<00:03, 25.94it/s][A
 11%|[38;2;30;71;6m#1        [0m| 11/100 [00:00<00:03, 27.02it/s][A
 15%|[38;2;30;71;6m#5        [0m| 15/100 [00:00<00:02, 30.59it/s][A
 19%|[38;2;30;71;6m#9        [0m| 19/100 [00:00<00:02, 32.14it/s][A
 23%|[38;2;30;71;6m##3       [0m| 23/100 [00:00<00:02, 31.35it/s][A
 27%|[38;2;30;71;6m#

 67%|██████▋   | 2/3 [00:26<00:13, 13.47s/trial, best loss: 1.2139101600646973]

[32m2023-12-17 11:30:20.864[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to modellog/20231217-113020[0m
[32m2023-12-17 11:30:20.866[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36m__init__[0m:[36m65[0m - [1mFound earlystop_kwargs in settings.Set to None if you dont want earlystopping.[0m
  0%|[38;2;30;71;6m          [0m| 0/3 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/100 [00:00<?, ?it/s][A
  5%|[38;2;30;71;6m5         [0m| 5/100 [00:00<00:01, 47.76it/s][A
 10%|[38;2;30;71;6m#         [0m| 10/100 [00:00<00:01, 48.14it/s][A
 15%|[38;2;30;71;6m#5        [0m| 15/100 [00:00<00:01, 47.61it/s][A
 20%|[38;2;30;71;6m##        [0m| 20/100 [00:00<00:01, 46.97it/s][A
 25%|[38;2;30;71;6m##5       [0m| 25/100 [00:00<00:01, 42.24it/s][A
 30%|[38;2;30;71;6m###       [0m| 30/100 [00:00<00:01, 43.71it/s][A
 36%|[38;2;30;71;6m###6      [0m| 36/100 [00:00<00:01, 46.05it/s][A
 41%|[38;2;30;71;6

100%|██████████| 3/3 [00:37<00:00, 12.44s/trial, best loss: 1.2139101600646973]


In [34]:
best_result

{'layers': 3.0, 'units': 88.0}

In [1]:
def read_mlflow_data(experiment_name):
    runs = mlflow.search_runs(experiment_ids=[experiment_name])
    return runs

mlflow_data = read_mlflow_data('2')

def create_heatmap_data(mlflow_data):
    heatmap_data = mlflow_data[['params.filters', 'params.units1', 'metrics.Loss/test']]
    return heatmap_data

def plot_heatmap(heatmap_data):
    heatmap_pivot = heatmap_data.pivot(index='params.filters', columns='params.units1', values='metrics.Loss/test')
    sns.heatmap(heatmap_pivot, annot=True, fmt=".4f", cmap="YlGnBu")
    plt.xlabel('Units')
    plt.ylabel('Dropout')
    plt.title('Test loss as a function of Units vs Dropout')
    plt.show()

heatmap_data = create_heatmap_data(mlflow_data)
plot_heatmap(heatmap_data)

NameError: name 'mlflow' is not defined