WongKinYiu · Youho99 · Feb 26, 2024 · Mar 6, 2024
diff --git a/README.md b/README.md
@@ -140,6 +140,10 @@ python -m torch.distributed.launch --nproc_per_node 8 --master_port 9527 train_d
 
 Under construction.
 
+## Loggers
+
+Check the [YOLOv9 loggers documentation](utils/loggers/README.md).
+
 
 ## Citation
 

diff --git a/utils/loggers/README.md b/utils/loggers/README.md
@@ -0,0 +1,59 @@
+# Loggers
+
+## Supported platforms
+
+* [TensorBoard](https://www.tensorflow.org/tensorboard)
+* [Weights & Biases](https://wandb.ai/)
+* [ClearML](https://clear.ml/)
+* [Comet](https://www.comet.com/)
+* [MLflow](https://mlflow.org/)
+
+## How to use ?
+
+### TensorBoard
+
+TODO
+
+### Weights & Biases
+
+TODO
+
+### ClearML
+
+TODO
+
+### Comet
+
+TODO
+
+### MLflow
+
+MLflow is an open-source platform for monitoring and managing machine learning experiments.
+
+1. __Prerequisites__
+
+Make sure you have installed the MLflow library:
+
+    pip install mlflow
+
+2. __Serveur MLflow__
+
+Launch your MLflow server. You can run it with the following command:
+
+    mlflow server --backend-store-uri mlflow_server
+
+This will start a local server at http://127.0.0.1:5000 by default and save all mlflow logs to the `mlflow_server` directory at the location of the command execution.
+
+To cut all instances of MLflow, you can run this command:
+
+    ps aux | grep 'mlflow' | grep -v 'grep' | awk '{print $2}' | xargs kill -9
+
+3. __MLflow parameters__
+
+Set your server address in the `MLFLOW_TRACKING_URI` environment variable. If the address is not provided, a warning will be raised and the run will not be recorded.
+
+Set the name of your experiment in the `MLFLOW_EXPERIMENT_NAME` environment variable. If no name is provided, the project name (--project of train.py) will be set by default.
+
+Define the name of your run in the `MLFLOW_RUN` environment variable. If no name is provided, the run name (--name of train.py) will be set by default.
+
+After that, your training sessions will be saved in your MLflow server!
diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py
@@ -12,7 +12,7 @@
 from utils.plots import plot_images, plot_labels, plot_results
 from utils.torch_utils import de_parallel
 
-LOGGERS = ('csv', 'tb', 'wandb', 'clearml', 'comet')  # *.csv, TensorBoard, Weights & Biases, ClearML
+LOGGERS = ('csv', 'tb', 'wandb', 'clearml', 'comet', 'mlflow')  # *.csv, TensorBoard, Weights & Biases, ClearML, MLflow
 RANK = int(os.getenv('RANK', -1))
 
 try:
@@ -48,6 +48,14 @@
 except (ModuleNotFoundError, ImportError, AssertionError):
     comet_ml = None
 
+try:
+    import mlflow
+
+    assert hasattr(mlflow, '__version__')  # verify package import not local dir
+    from utils.loggers.mlflow import MLflowLogger
+except (ImportError, AssertionError):
+    mlflow = None
+
 
 class Loggers():
     # YOLO Loggers class
@@ -91,6 +99,10 @@ def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None,
             prefix = colorstr('Comet: ')
             s = f"{prefix}run 'pip install comet_ml' to automatically track and visualize YOLO 🚀 runs in Comet"
             self.logger.info(s)
+        if not mlflow:
+            prefix = colorstr('MLflow: ')
+            s = f"{prefix}run 'pip install mlflow' to automatically track and visualize YOLO 🚀 runs in MLflow"
+            self.logger.info(s)
         # TensorBoard
         s = self.save_dir
         if 'tb' in self.include and not self.opt.evolve:
@@ -129,6 +141,12 @@ def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None,
         else:
             self.comet_logger = None
 
+        # MLflow
+        if mlflow and 'mlflow' in self.include:
+            self.mlflow = MLflowLogger(self.opt, self.hyp) 
+        else:
+            self.mlflow = None
+
     @property
     def remote_dataset(self):
         # Get data_dict if custom dataset artifact link is provided
@@ -139,6 +157,8 @@ def remote_dataset(self):
             data_dict = self.wandb.data_dict
         if self.comet_logger:
             data_dict = self.comet_logger.data_dict
+        if self.mlflow:
+            data_dict = self.mlflow.data_dict
 
         return data_dict
 
@@ -161,6 +181,8 @@ def on_pretrain_routine_end(self, labels, names):
             #    pass  # ClearML saves these images automatically using hooks
             if self.comet_logger:
                 self.comet_logger.on_pretrain_routine_end(paths)
+            if self.mlflow:
+                self.mlflow.on_pretrain_routine_end(paths)
 
     def on_train_batch_end(self, model, ni, imgs, targets, paths, vals):
         log_dict = dict(zip(self.keys[0:3], vals))
@@ -250,6 +272,9 @@ def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
         if self.comet_logger:
             self.comet_logger.on_fit_epoch_end(x, epoch=epoch)
 
+        if self.mlflow:
+            self.mlflow.on_fit_epoch_end(x, epoch=epoch)
+
     def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
         # Callback runs on model save event
         if (epoch + 1) % self.opt.save_period == 0 and not final_epoch and self.opt.save_period != -1:
@@ -263,6 +288,9 @@ def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
         if self.comet_logger:
             self.comet_logger.on_model_save(last, epoch, final_epoch, best_fitness, fi)
 
+        if self.mlflow and not final_epoch:
+            self.mlflow.on_model_save(last)
+
     def on_train_end(self, last, best, epoch, results):
         # Callback runs on training end, i.e. saving best model
         if self.plots:
@@ -295,6 +323,9 @@ def on_train_end(self, last, best, epoch, results):
             final_results = dict(zip(self.keys[3:10], results))
             self.comet_logger.on_train_end(files, self.save_dir, last, best, epoch, final_results)
 
+        if self.mlflow:
+            self.mlflow.on_train_end(self.save_dir)
+
     def on_params_update(self, params: dict):
         # Update hyperparams or configs of the experiment
         if self.wandb:

diff --git a/utils/loggers/mlflow/__init__.py b/utils/loggers/mlflow/__init__.py
@@ -0,0 +1,79 @@
+import os
+from utils.general import LOGGER, colorstr
+from pathlib import Path
+
+try:
+    import mlflow
+except (ModuleNotFoundError, ImportError):
+    mlflow = None
+
+PREFIX = colorstr("MLflow: ")
+SANITIZE = lambda x: {k.replace(':', '_').replace(';', '_'): v for k, v in x.items()}
+
+
+class MLflowLogger:
+    """Log metrics, parameters, models and much more with MLflow"""
+
+
+    def __init__(self, opt, hyp) -> None:
+
+        self.mlflow = mlflow
+        self.data_dict = None
+        self.opt = opt
+        self.hyp = hyp
+
+
+    def on_pretrain_routine_end(self, paths):
+
+        global mlflow
+
+        uri = os.environ.get("MLFLOW_TRACKING_URI")
+        LOGGER.info(f"{PREFIX} tracking uri: {uri}")
+        mlflow.set_tracking_uri(uri)
+        experiment_name = os.environ.get("MLFLOW_EXPERIMENT_NAME") or self.opt.save_dir.split('/')[-2]
+        run_name = os.environ.get("MLFLOW_RUN") or self.opt.save_dir.split('/')[-1]
+        mlflow.set_experiment(experiment_name)
+        mlflow.pytorch.autolog()
+
+        try:
+            active_run = mlflow.active_run() or mlflow.start_run(run_name=run_name)
+            LOGGER.info(f"{PREFIX}logging run_id({active_run.info.run_id}) to {uri}")
+
+            # Don't save hyps from opt directly
+            mlflow.log_params({f"param/{key}": value for key, value in vars(self.opt).items() if key != 'hyp'})
+            mlflow.log_params({f"hyp/{key}": value for key, value in self.hyp.items()})
+
+        except Exception as e:
+            LOGGER.warning(f"{PREFIX}WARNING ⚠️ Failed to initialize: {e}\n" f"{PREFIX}WARNING ⚠️ Not tracking this run")
+            self.mlflow = None
+
+
+        for path in paths:
+            mlflow.log_artifact(str(path))
+
+
+    def on_fit_epoch_end(self, vals, epoch):
+        """Log training metrics at the end of each fit epoch to MLflow."""
+
+        if self.mlflow:
+            mlflow.log_metrics(metrics=SANITIZE(vals), step=epoch)
+
+
+    def on_model_save(self, last):
+        if self.mlflow:
+            mlflow.log_artifacts(os.path.dirname(last), artifact_path="weights")
+
+
+    def on_train_end(self, save_dir):
+        """Log model artifacts at the end of the training."""
+
+        if self.mlflow:
+            mlflow.log_artifacts(save_dir)
+
+            mlflow.end_run()
+
+            LOGGER.info(
+                f"{PREFIX}results logged to {mlflow.get_tracking_uri()}\n"
+            )
+
+