In [1]:
import numpy as np
import mlflow 
from tempfile import TemporaryDirectory
from pathlib import Path 
import json 
# TODO: import mlflow
import mlflow 


In [2]:
# TODO: Set the tracking URI to your localhost ip http://127.0.0.1:PORT/ 
# (PORT is usually 5000)

mlflow.set_tracking_uri("http://127.0.0.1:5000/")

In [3]:
# TODO: Start a new mlflow run "mlflow-tracking" and export its run_id
# IMPORTANT: Keep the naming of "exercise_2_id", and "tracking_run_id" as they are needed later
exercise_2_id = mlflow.set_experiment("exercise-02").experiment_id
tracking_run_id = mlflow.start_run(run_name ="mlflow-tracking", experiment_id=exercise_2_id)

## LOG Parameters

In [4]:
# log single parameter
# TODO log a learning_rate of 0.01
mlflow.log_param("learning_rate", 0.01)

0.01

In [5]:
# log multiple parameters
params = {"epochs": 20, "num": "sigmoid"}
# TODO: log the above mentioned parameters to mlflow
mlflow.log_params(params)

## LOG Tags

In [6]:
from mlflow_training import __version__
# TODO: set an "environment" tag to "dev" and a "username" tag to "your name" and version to __version__

mlflow.set_tag("version", __version__)
mlflow.set_tags({"environment": "dev", "username": "Florian Krempl"})

## LOG Metrics

In [7]:
# -- Metrics
# TODO: Log a F-score of 0.7
mlflow.log_metric("f_score", 0.7)

# TODO Log multiple metrics (accuracy, recall, precision)
metrics = {
    "accuracy": 0.98, 
    "recall": 0.97, 
    "precision": 0.99
}

mlflow.log_metrics(metrics)

In [8]:
import time 
# You can also log metrics during an experiment for example loss
# imagine you have a training run and every batch produces a loss value 
loss_values = np.logspace(4, 0, num=100)

for batch_num, loss in enumerate(loss_values):
    #TODO log the metric for every batch
    mlflow.log_metric("loss", loss, step=batch_num)
    time.sleep(0.5)

Look at the active mlflow run (`mlflow-tracking`) in the experiment `exercise-02` and select `loss` in the run overview to observe the model during training. 

## Log Artifacts

In [9]:
# Create an example file output/test.txt
with TemporaryDirectory() as temp_dir: 
    file_path = Path(temp_dir) / "config.json"

    with file_path.open("w") as outfile:
        json.dump(params, outfile)
    
    mlflow.log_artifact(file_path)

artifact_uri = mlflow.get_artifact_uri()
print(f"your artifact is stored here: ", artifact_uri)


your artifact is stored here:  mlflow-artifacts:/264992445669677316/1ec005054aaf41d0a6cdd124526de25c/artifacts


In [10]:
# End previous runs
mlflow.end_run()

2024/08/27 16:38:43 INFO mlflow.tracking._tracking_service.client: 🏃 View run mlflow-tracking at: http://127.0.0.1:5000/#/experiments/264992445669677316/runs/1ec005054aaf41d0a6cdd124526de25c.
2024/08/27 16:38:43 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/264992445669677316.


### TODO Look at your run on the UI and check out everything you logged: 
- Parameters 
- Tags
- Metrics 
- Artifacts 

## Autolog 
MLFlow also provides a Autolog function for some Frameworks. With SKlearn this works very well if you want just some basic metrics and logs for your model.

In [11]:
import numpy as np
from sklearn.ensemble import RandomForestRegressor

params = {"n_estimators": 4, "random_state": 42}

# TODO: start autologging the upcoming run
mlflow.sklearn.autolog()

run_name = 'autologging model example'
with mlflow.start_run(run_name=run_name) as run:
    rfr = RandomForestRegressor(**params).fit(np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]]), np.array([1, 1, 1]))
    print(f"run_id: {run.info.run_id}")

# TODO: stop autologging
mlflow.sklearn.autolog(disable=True)


2024/08/27 16:42:31 INFO mlflow.tracking._tracking_service.client: 🏃 View run autologging model example at: http://127.0.0.1:5000/#/experiments/264992445669677316/runs/fbdd4fe2b19b4d56954ac762b9b28950.
2024/08/27 16:42:31 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/264992445669677316.


run_id: fbdd4fe2b19b4d56954ac762b9b28950


## Search Runs 

In [6]:
# TODO search for runs in our experiment and sort them by accuracy  

runs_df = mlflow.search_runs(experiment_names=["exercise-02"])
runs_df.sort_values("metrics.accuracy")[["run_id", "metrics.accuracy"]]

Unnamed: 0,run_id,metrics.accuracy
1,1ec005054aaf41d0a6cdd124526de25c,0.98
3,3a6e1b4a32a742e28c4dc9f90b0e4aca,0.98
4,554b8d4376844382a9843082fd327605,0.98
0,fbdd4fe2b19b4d56954ac762b9b28950,
2,be50375876b147cf83299705401e1c02,
