In [3]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
import mlflow
import os

In [4]:
# mlflow_util.py
import subprocess
import os

def get_git_revision_hash():
    return subprocess.check_output(['git', 'rev-parse', 'HEAD'])

def get_git_branch():
    return subprocess.check_output(['git', 'branch', '--show-current'])

def get_git_remote():
    return subprocess.check_output(['git', 'config', '--get', 'remote.origin.url'])

def get_pip_freeze():
    return subprocess.check_output(['pip', 'freeze']).splitlines()

new_line = bytes("\n", "UTF-8")

def record_libraries(mlflow):
    with open("pip_freeze.txt", "wb") as file:
        for line in get_pip_freeze():
            file.write(line)
            file.write(new_line)
    file.close()
    mlflow.log_artifact("pip_freeze.txt")
    os.remove("pip_freeze.txt")
    
def log_metric(mlflow, metric_name, metric_value):
    mlflow.tracking.fluent.log_metric(key=metric_name, value=metric_value)   
    
def log_param(mlflow, param_name, param_value):
    mlflow.tracking.fluent.log_param(key=param_name, value=param_value)       

## Prepare training data

In [5]:
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
y = np.dot(X, np.array([1, 2])) + 3

## Setup Mlflow

You will put the location of Mlflow server. Note that becuase your notebook and the Mlflow server is running on Kubernetes, we just put the location of Kubernetes Service.

We are using our local Minio server as the S3 storage and therefore pass the variables named AWS_SECRET_ACCESS_KEY containing the password.

## Experiment Name

This is one important variable via which all of your experiment runs will be stored in the Mlflow server

In [6]:
HOST = "http://mlflow-tracking.default.svc.cluster.local:80"
EXPREIMENT_NAME = "HelloMlFlow"

os.environ['MLFLOW_S3_ENDPOINT_URL'] = "http://mlflow-minio.default.svc.cluster.local"
os.environ['AWS_REGION'] = 'ap-northeast-2'
os.environ['AWS_BUCKET_NAME'] = 'dksshddl-data'
os.environ['MLFLOW_TRACKING_USERNAME'] = 'user'
os.environ['MLFLOW_TRACKING_PASSWORD'] = 'dRgxazTbSF'


mlflow.set_tracking_uri(HOST)
mlflow.set_experiment(EXPREIMENT_NAME)
mlflow.sklearn.autolog()

In [12]:
model = DecisionTreeClassifier(max_depth=5, criterion='gini',min_samples_leaf = 3 ,min_samples_split = 10)


## Adding custom tags to the run

Mlflow api allows to associate the custom tags as shown below.

record_libraries is a custom function which runs the pip freeze command and store it as a file to the mlflow run. You can find this function in the associated mlflow_util class in this repo.

log_metric and log_param are utility function that will store the provided parameters onto the mlflow. Note that log_metric takes a float as a value

In [15]:
with mlflow.start_run(tags={
    "hello" : "cjhyun",
    "test": "tag"
    }) as run:
    
    model.fit(X, y)
    record_libraries(mlflow)
    log_metric(mlflow, "custom_mteric", 1.0)
    log_param(mlflow, str.lower("MLFLOW_S3_ENDPOINT_URL"), os.environ["MLFLOW_S3_ENDPOINT_URL"])

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


🏃 View run overjoyed-deer-203 at: http://mlflow-tracking.default.svc.cluster.local:80/#/experiments/1/runs/1d777626e8264b56ad640983017ae03e
🧪 View experiment at: http://mlflow-tracking.default.svc.cluster.local:80/#/experiments/1


In [14]:
!env

MLFLOW_POSTGRESQL_PORT_5432_TCP=tcp://172.20.57.198:5432
SPARK_PI_UI_SVC_SERVICE_PORT=4040
MINIO_FOR_MLOPS_CONSOLE_PORT_9090_TCP_PORT=9090
SPARK_PI_UI_SVC_PORT=tcp://172.20.155.221:4040
PROXY_PUBLIC_SERVICE_HOST=172.20.162.22
KUBERNETES_SERVICE_PORT=443
HUB_PORT=tcp://172.20.124.11:8081
KUBERNETES_PORT=tcp://172.20.0.1:443
HUB_SERVICE_PORT=8081
MINIO_FOR_MLOPS_CONSOLE_PORT_9090_TCP_PROTO=tcp
PROXY_API_PORT_8001_TCP_PORT=8001
JUPYTERHUB_PUBLIC_URL=
JUPYTERHUB_OAUTH_SCOPES=["access:servers!server=admin/", "access:servers!user=admin"]
PROXY_API_PORT_8001_TCP_PROTO=tcp
MINIO_FOR_MLOPS_CONSOLE_PORT=tcp://172.20.226.193:9090
MINIO_FOR_MLOPS_CONSOLE_SERVICE_PORT=9090
MPLBACKEND=module://matplotlib_inline.backend_inline
HOSTNAME=jupyter-admin
JUPYTERHUB_HOST=
MINIO_SERVICE_PORT=80
MINIO_PORT=tcp://172.20.198.157:80
MLFLOW_S3_ENDPOINT_URL=http://mlflow-minio.default.svc.cluster.local
JUPYTERHUB_USER=admin
MLFLOW_MINIO_PORT_9001_TCP_ADDR=172.20.200.162
MLFLOW_TRACKING_PORT_80_TCP_ADDR=172.20.176