In [None]:
import sys
sys.path.append("/mnt/code/")

In [None]:
import argparse
import os

from filelock import FileLock
from tensorflow.keras.datasets import mnist

import ray
from ray import train, tune
from ray.tune.schedulers import AsyncHyperBandScheduler
#from ray.air.integrations.keras import ReportCheckpointCall

In [None]:
import tensorflow

In [None]:
import torchmetrics as tm

In [None]:
import os
import tempfile
import time

import mlflow
from ray import air, tune
from ray.air import session
from domino_mlflow_utils.mlflow_callback import *

In [None]:
def evaluation_fn(step, width, height):
    return (0.1 + width * step / 100) ** (-1) + height * 0.1


def train_function(config):
    width, height = config["width"], config["height"]

    for step in range(config.get("steps", 100)):
        # Iterative training function - can be any arbitrary training procedure
        intermediate_score = evaluation_fn(step, width, height)
        # Feed the score back to Tune.
        session.report({"iterations": step, "mean_loss": intermediate_score})
        time.sleep(0.1)

In [None]:
def tune_with_callback(mlflow_tracking_uri, experiment_name,finish_fast=False):
    cb = MyMLflowLoggerCallback(
                    tracking_uri=mlflow_tracking_uri,
                    experiment_name=experiment_name,
                    save_artifact=True,
                )
    tuner = tune.Tuner(
        train_function,
        tune_config=tune.TuneConfig(num_samples=5),        
        run_config=air.RunConfig(
            name="mlflow",
            callbacks=[
                cb
            ],
        ),
        param_space={
            "width": tune.randint(10, 100),
            "height": tune.randint(0, 100),
            "steps": 5 if finish_fast else 100,
        },
    )
    results = tuner.fit()
    cb.log_end_parent_run()

In [None]:
import mlflow
import os
import ray
import logging
from domino_mlflow_utils.mlflow_callback import *

temp_dir='/tmp'

if not ray.is_initialized():
    service_host = os.environ["RAY_HEAD_SERVICE_HOST"]
    service_port = os.environ["RAY_HEAD_SERVICE_PORT"]
    address=f"ray://{service_host}:{service_port}"
    #temp_dir='/mnt/data//{}/'.format(os.environ['DOMINO_PROJECT_NAME']) #set to a dataset
    ray.init(address=address, _temp_dir=temp_dir,runtime_env={"py_modules": ['/mnt/code/domino_mlflow_utils']})

print('Ray Initializied')
print(f'Ray Host={service_host} and Ray Port={service_port}')

experiment_name = 'RAY-TUNE-'+'-' + os.environ['DOMINO_STARTING_USERNAME'] + '-' + os.environ['DOMINO_PROJECT_NAME']
tune_with_callback(os.environ['CLUSTER_MLFLOW_TRACKING_URI'], experiment_name,finish_fast=True)

#tune_with_setup(os.environ['CLUSTER_MLFLOW_TRACKING_URI'], experiment_name,finish_fast=True)
df = mlflow.search_runs(
        [mlflow.get_experiment_by_name(experiment_name).experiment_id]
    )
print(df)


|    | run_id                           |   experiment_id | status   | artifact_uri                                                        | start_time                       | end_time                         |   metrics.config/steps |   metrics.time_total_s |   metrics.timestamp |   metrics.mean_loss |   metrics.time_this_iter_s |   metrics.iterations |   metrics.config/width |   metrics.config/height |   metrics.iterations_since_restore |   metrics.done |   metrics.time_since_restore |   metrics.training_iteration |   metrics.pid |   params.height |   params.width |   params.steps | tags.mlflow.domino.user   | tags.trial_name            | tags.mlflow.user   | tags.mlflow.domino.run_id   | tags.mlflow.parentRunId          | tags.mlflow.domino.project_id   | tags.mlflow.domino.environment_id   | tags.mlflow.domino.root_run_id   |   tags.mlflow.domino.run_number | tags.mlflow.domino.project_name   | tags.mlflow.domino.user_id   | tags.mlflow.domino.dataset_info                   | tags.mlflow.source.type   | tags.mlflow.domino.environment_revision_id   | tags.mlflow.domino.hardware_tier   | tags.mlflow.runName        |
|---:|:---------------------------------|----------------:|:---------|:--------------------------------------------------------------------|:---------------------------------|:---------------------------------|-----------------------:|-----------------------:|--------------------:|--------------------:|---------------------------:|---------------------:|-----------------------:|------------------------:|-----------------------------------:|---------------:|-----------------------------:|-----------------------------:|--------------:|----------------:|---------------:|---------------:|:--------------------------|:---------------------------|:-------------------|:----------------------------|:---------------------------------|:--------------------------------|:------------------------------------|:---------------------------------|--------------------------------:|:----------------------------------|:-----------------------------|:--------------------------------------------------|:--------------------------|:---------------------------------------------|:-----------------------------------|:---------------------------|
|  0 | efc2ebcf3cb3459ab381c22c0803d4e8 |              12 | FINISHED | mlflow-artifacts:/mlflow/efc2ebcf3cb3459ab381c22c0803d4e8/artifacts | 2024-03-04 12:16:10.514000+00:00 | 2024-03-04 12:16:20.189000+00:00 |                      5 |               0.403112 |         1.70955e+09 |             0.77037 |                   0.100493 |                    4 |                     65 |                       4 |                                  5 |              0 |                     0.403112 |                            5 |          1308 |               4 |             65 |              5 | integration-test          | train_function_ec450_00004 | integration-test   | 65e5b5c446b29e1208590160    | 0a07b8362887418ebbd59a0e851d879d | 65e5557490758361e5bc348d        | 65e5738090758361e5bc34aa            | 0a07b8362887418ebbd59a0e851d879d |                               6 | mlflow-demos                      | 65df309894ef6c5ddd8b2705     | 65e5557b90758361e5bc3492-65e5557b90758361e5bc3491 | NOTEBOOK                  | 65e5aedb90758361e5bc353c                     | small-k8s                          | train_function_ec450_00004 |
|  1 | 87069e82a5654197ae0e5a12b66f9e3f |              12 | FINISHED | mlflow-artifacts:/mlflow/87069e82a5654197ae0e5a12b66f9e3f/artifacts | 2024-03-04 12:15:55.243000+00:00 | 2024-03-04 12:16:06.842000+00:00 |                      5 |               0.403373 |         1.70955e+09 |             4.05641 |                   0.100768 |                    4 |                     95 |                      38 |                                  5 |              0 |                     0.403373 |                            5 |          1308 |              38 |             95 |              5 | integration-test          | train_function_ec450_00002 | integration-test   | 65e5b5c446b29e1208590160    | 0a07b8362887418ebbd59a0e851d879d | 65e5557490758361e5bc348d        | 65e5738090758361e5bc34aa            | 0a07b8362887418ebbd59a0e851d879d |                               6 | mlflow-demos                      | 65df309894ef6c5ddd8b2705     | 65e5557b90758361e5bc3492-65e5557b90758361e5bc3491 | NOTEBOOK                  | 65e5aedb90758361e5bc353c                     | small-k8s                          | train_function_ec450_00002 |
|  2 | 3977b0e4dade4a0192c6cf3d7824eebf |              12 | FINISHED | mlflow-artifacts:/mlflow/3977b0e4dade4a0192c6cf3d7824eebf/artifacts | 2024-03-04 12:15:53.043000+00:00 | 2024-03-04 12:16:07.640000+00:00 |                      5 |               0.402544 |         1.70955e+09 |             6.28249 |                   0.100624 |                    4 |                     86 |                      60 |                                  5 |              0 |                     0.402544 |                            5 |          4872 |              60 |             86 |              5 | integration-test          | train_function_ec450_00003 | integration-test   | 65e5b5c446b29e1208590160    | 0a07b8362887418ebbd59a0e851d879d | 65e5557490758361e5bc348d        | 65e5738090758361e5bc34aa            | 0a07b8362887418ebbd59a0e851d879d |                               6 | mlflow-demos                      | 65df309894ef6c5ddd8b2705     | 65e5557b90758361e5bc3492-65e5557b90758361e5bc3491 | NOTEBOOK                  | 65e5aedb90758361e5bc353c                     | small-k8s                          | train_function_ec450_00003 |
|  3 | 4b4467fc71144b91a5726ad73afa4fee |              12 | FINISHED | mlflow-artifacts:/mlflow/4b4467fc71144b91a5726ad73afa4fee/artifacts | 2024-03-04 12:15:50.729000+00:00 | 2024-03-04 12:16:13.547000+00:00 |                      5 |               0.403404 |         1.70955e+09 |             3.17619 |                   0.100774 |                    4 |                     50 |                      27 |                                  5 |              0 |                     0.403404 |                            5 |          1380 |              27 |             50 |              5 | integration-test          | train_function_ec450_00001 | integration-test   | 65e5b5c446b29e1208590160    | 0a07b8362887418ebbd59a0e851d879d | 65e5557490758361e5bc348d        | 65e5738090758361e5bc34aa            | 0a07b8362887418ebbd59a0e851d879d |                               6 | mlflow-demos                      | 65df309894ef6c5ddd8b2705     | 65e5557b90758361e5bc3492-65e5557b90758361e5bc3491 | NOTEBOOK                  | 65e5aedb90758361e5bc353c                     | small-k8s                          | train_function_ec450_00001 |
|  4 | 9cd9fc01aa2b4dc19974cc564bc740bc |              12 | FINISHED | mlflow-artifacts:/mlflow/9cd9fc01aa2b4dc19974cc564bc740bc/artifacts | 2024-03-04 12:15:50.332000+00:00 | 2024-03-04 12:16:17.236000+00:00 |                      5 |               0.403019 |         1.70955e+09 |            10.2155  |                   0.100751 |                    4 |                     46 |                      97 |                                  5 |              0 |                     0.403019 |                            5 |          1022 |              97 |             46 |              5 | integration-test          | train_function_ec450_00000 | integration-test   | 65e5b5c446b29e1208590160    | 0a07b8362887418ebbd59a0e851d879d | 65e5557490758361e5bc348d        | 65e5738090758361e5bc34aa            | 0a07b8362887418ebbd59a0e851d879d |                               6 | mlflow-demos                      | 65df309894ef6c5ddd8b2705     | 65e5557b90758361e5bc3492-65e5557b90758361e5bc3491 | NOTEBOOK                  | 65e5aedb90758361e5bc353c                     | small-k8s                          | train_function_ec450_00000 |
|  5 | 0a07b8362887418ebbd59a0e851d879d |              12 | RUNNING  | mlflow-artifacts:/mlflow/0a07b8362887418ebbd59a0e851d879d/artifacts | 2024-03-04 12:15:47.396000+00:00 | NaT                              |                    nan |             nan        |       nan           |           nan       |                 nan        |                  nan |                    nan |                     nan |                                nan |            nan |                   nan        |                          nan |           nan |                 |                |                | integration-test          |                            | integration-test   | 65e5b5c446b29e1208590160    |                                  | 65e5557490758361e5bc348d        | 65e5738090758361e5bc34aa            |                                  |                               6 | mlflow-demos                      | 65df309894ef6c5ddd8b2705     | 65e5557b90758361e5bc3492-65e5557b90758361e5bc3491 | NOTEBOOK                  | 65e5aedb90758361e5bc353c                     | small-k8s                          | root-2024-03-04 12:15:47   |