In [1]:
from pathlib import Path
import gin
import numpy as np
import torch
from typing import List
from torch.nn.utils.rnn import pad_sequence
from mltrainer import rnn_models, Trainer
from torch import optim

from mads_datasets import datatools

In [2]:
from mads_datasets import DatasetFactoryProvider, DatasetType
from mltrainer.preprocessors import PaddedPreprocessor
preprocessor = PaddedPreprocessor()

gesturesdatasetfactory = DatasetFactoryProvider.create_factory(DatasetType.GESTURES)
batch_size = 32
streamers = gesturesdatasetfactory.create_datastreamer(batchsize=batch_size, preprocessor=preprocessor)
train = streamers["train"]
valid = streamers["valid"]

[32m2024-11-25 18:16:13.955[0m | [1mINFO    [0m | [36mmads_datasets.base[0m:[36mdownload_data[0m:[36m121[0m - [1mFolder already exists at /home/asanchezsa/.cache/mads_datasets/gestures[0m
100%|[38;2;30;71;6m██████████[0m| 2600/2600 [00:00<00:00, 4392.60it/s]
100%|[38;2;30;71;6m██████████[0m| 651/651 [00:00<00:00, 4182.79it/s]


In [3]:
trainstreamer = train.stream()
validstreamer = valid.stream()
x, y = next(iter(trainstreamer))
x.shape, y

(torch.Size([32, 30, 3]),
 tensor([ 5,  4,  5,  6,  4, 14, 15,  7, 12,  1, 14, 12, 16,  4,  2,  9,  1, 16,
          2, 17, 14,  9,  4,  3,  8, 19,  3, 16,  6, 13, 13,  7]))

In [4]:
from mltrainer import TrainerSettings, ReportTypes
from mltrainer.metrics import Accuracy

accuracy = Accuracy()

settings = TrainerSettings(
    epochs=10,
    metrics=[accuracy],
    logdir=Path("gestures"),
    train_steps=len(train),
    valid_steps=len(valid),
    reporttypes=[ReportTypes.GIN, ReportTypes.TENSORBOARD, ReportTypes.MLFLOW],
    scheduler_kwargs={"factor": 0.2, "patience": 5},
    earlystop_kwargs=None
)
settings

epochs: 10
metrics: [Accuracy]
logdir: gestures
train_steps: 81
valid_steps: 20
reporttypes: [<ReportTypes.GIN: 1>, <ReportTypes.TENSORBOARD: 2>, <ReportTypes.MLFLOW: 3>]
optimizer_kwargs: {'lr': 0.001, 'weight_decay': 1e-05}
scheduler_kwargs: {'factor': 0.2, 'patience': 5}
earlystop_kwargs: None

In [5]:
import sys
from pathlib import Path

# Add the directory containing mymodels.py to the system path
sys.path.append(str(Path('/home/asanchezsa/dl-training/MADS-MachineLearning-course/notebooks/9_practice').resolve()))

import mymodels

model = mymodels.LSTM_HyperModel(input_dim=3, hidden_dim=100, layer_dim=5, output_dim=20).to("cpu")

In [6]:
import torch.optim as optim
from mltrainer import metrics
optimizer = optim.Adam
loss_fn = torch.nn.CrossEntropyLoss()
accuracy = metrics.Accuracy()

In [7]:
yhat = model(x.to("cpu"))
accuracy(y.to("cpu"), yhat)

tensor(0.0625)

In [8]:
import mlflow

mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("LSTM_hypertuningmodel")

<Experiment: artifact_location='/home/asanchezsa/dl-training/MADS-MachineLearning-course/notebooks/9_practice/rnn_training/mlruns/3', creation_time=1732551876214, experiment_id='3', last_update_time=1732551876214, lifecycle_stage='active', name='LSTM_hypertuningmodel', tags={}>

In [9]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt.pyll import scope

In [10]:
modeldir = Path("../../models/lstm").resolve()
if not modeldir.exists():
    modeldir.mkdir()
    print(f"Created {modeldir}")

In [11]:
import torch.optim as optim
from mltrainer import metrics, Trainer, TrainerSettings, ReportTypes
from datetime import datetime

# define the objective function for hyperparameter optimization
def objective(params):
    # Start a new MLflow run for tracking the experiment
    with mlflow.start_run():
        mlflow.set_tag("model", "LSTM")
        mlflow.set_tag("dev", "ASanchezSaavedra")
        # log hyperparameters to MLflow
        mlflow.log_params(params)
        mlflow.log_param("batch_size", f"{batch_size}")

        # Initialize the optimizer, loss function and accuracy metric
        optimizer = optim.Adam
        loss_fn = torch.nn.CrossEntropyLoss()
        accuracy = metrics.Accuracy()

        # Instantiate the model with the given hyperparameters
        model = mymodels.LSTM_HyperModel(**params)
        # train the model using a custom train loop
        trainer = Trainer(
            model=model,
            settings=settings,
            loss_fn=loss_fn,
            optimizer=optimizer,
            traindataloader=trainstreamer,
            validdataloader=validstreamer,
            scheduler=optim.lr_scheduler.ReduceLROnPlateau,
            device="cpu",
        )
        trainer.loop()

        # save the trained model with a timestamp
        tag = datetime.now().strftime("%Y%m%d-%H%M")
        modelpath = modeldir / (tag + "model.pt")
        torch.save(model, modelpath)

        # Log the saved model as an artifact in MLflow
        mlflow.log_artifact(local_path=modelpath, artifact_path="pytorch_models")
        return {'loss' : trainer.test_loss, 'status': STATUS_OK}

In [15]:
search_space = {
    "input_dim": 3,
    "hidden_dim": scope.int(hp.quniform("hidden_dim", 50, 200, 10)),
    "layer_dim": scope.int(hp.quniform("layer_dim", 1, 5, 1)),
    "output_dim": 20,
}

In [16]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=3,
    trials=Trials()
)

  0%|          | 0/3 [00:00<?, ?trial/s, best loss=?]

[32m2024-11-25 18:17:26.005[0m | [1mINFO    [0m | [36mmltrainer.trainer[0m:[36mdir_add_timestamp[0m:[36m29[0m - [1mLogging to gestures/20241125-181726[0m
  0%|[38;2;30;71;6m          [0m| 0/10 [00:00<?, ?it/s]
  0%|[38;2;30;71;6m          [0m| 0/81 [00:00<?, ?it/s][A
  7%|[38;2;30;71;6m7         [0m| 6/81 [00:00<00:01, 51.97it/s][A
 15%|[38;2;30;71;6m#4        [0m| 12/81 [00:00<00:01, 44.56it/s][A
 22%|[38;2;30;71;6m##2       [0m| 18/81 [00:00<00:01, 50.04it/s][A
 31%|[38;2;30;71;6m###       [0m| 25/81 [00:00<00:00, 57.06it/s][A
 40%|[38;2;30;71;6m###9      [0m| 32/81 [00:00<00:00, 59.29it/s][A
 49%|[38;2;30;71;6m####9     [0m| 40/81 [00:00<00:00, 64.53it/s][A
 59%|[38;2;30;71;6m#####9    [0m| 48/81 [00:00<00:00, 68.02it/s][A
 69%|[38;2;30;71;6m######9   [0m| 56/81 [00:00<00:00, 69.99it/s][A
 79%|[38;2;30;71;6m#######9  [0m| 64/81 [00:01<00:00, 70.12it/s][A
 89%|[38;2;30;71;6m########8 [0m| 72/81 [00:01<00:00, 70.91it/s][A
 99%|[38;2;30;7

  0%|          | 0/3 [00:11<?, ?trial/s, best loss=?]


KeyboardInterrupt: 