# Run Sklearn Model and Save to K8s MLflow

In [39]:
import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn import datasets
import numpy as np
import joblib
from joblib import dump, load
import mlflow
import os

In [2]:
sklearn.__version__

'0.24.2'

In [3]:
joblib.__version__

'1.0.1'

In [4]:
iris = datasets.load_iris()

In [5]:
iris['data'].shape

(150, 4)

In [6]:
iris['target'].shape

(150,)

In [7]:
np.unique(iris['target'])

array([0, 1, 2])

In [8]:
X = iris['data']
y = iris['target']

In [9]:
X[0]

array([5.1, 3.5, 1.4, 0.2])

In [10]:
clf = RandomForestClassifier(random_state=42)

In [11]:
clf.fit(X, y)

RandomForestClassifier(random_state=42)

In [12]:
clf.score(X, y)

1.0

### Save to MLflow

In [13]:
server_url = "http://localhost:8088"

In [14]:
mlflow.set_tracking_uri(server_url)

In [15]:
mlflow.list_experiments()

[<Experiment: artifact_location='/tmp/0', experiment_id='0', lifecycle_stage='active', name='Default', tags={}>,
 <Experiment: artifact_location='/tmp/1', experiment_id='1', lifecycle_stage='active', name='seldon_getting_started_custom_mlflow2', tags={}>]

In [16]:
experiment_name = "seldon_getting_started_custom_mlflow2"

In [None]:
mlflow.create_experiment(experiment_name)

In [17]:
experiment = mlflow.get_experiment_by_name(experiment_name)
experiment

<Experiment: artifact_location='/tmp/1', experiment_id='1', lifecycle_stage='active', name='seldon_getting_started_custom_mlflow2', tags={}>

In [18]:
mlflow.set_experiment(experiment_name)

In [19]:
mlflow.sklearn.get_default_conda_env()

{'name': 'mlflow-env',
 'channels': ['conda-forge'],
 'dependencies': ['python=3.8.10',
  'pip',
  {'pip': ['mlflow', 'scikit-learn==0.24.2']}]}

In [20]:
mlflow.start_run(experiment_id=experiment.experiment_id, run_name="run3")

<ActiveRun: >

In [21]:
mlflow.log_param("criterion", "test")

In [22]:
mlflow.sklearn.log_model(clf, "model")

In [23]:
mlflow.list_run_infos(experiment.experiment_id)

[<RunInfo: artifact_uri='/tmp/1/660ec993d7304595956ef27e3afedf81/artifacts', end_time=None, experiment_id='1', lifecycle_stage='active', run_id='660ec993d7304595956ef27e3afedf81', run_uuid='660ec993d7304595956ef27e3afedf81', start_time=1629563050745, status='RUNNING', user_id='christian'>,
 <RunInfo: artifact_uri='/tmp/1/0ce7952b59204010b73fdbe2b01b0c17/artifacts', end_time=1629562428232, experiment_id='1', lifecycle_stage='active', run_id='0ce7952b59204010b73fdbe2b01b0c17', run_uuid='0ce7952b59204010b73fdbe2b01b0c17', start_time=1629562425504, status='FINISHED', user_id='christian'>]

In [24]:
experiment.experiment_id

'1'

In [25]:
mlflow.end_run()

In [26]:
print(mlflow.get_artifact_uri())

/tmp/1/7370bde697544da5b6e728ba07cd5fd5/artifacts


In [27]:
pod_name = "mlflow-86c769bddb-4klrz" # hosting mlflow

In [37]:
def save_artifact(pod_name, artifact_experiment):
    cmd = "kubectl cp {1} {0}:/tmp".format(pod_name,experiment.artifact_location)
    print(cmd)
    os.system(cmd)

In [38]:
save_artifact(pod_name, experiment.artifact_location)

kubectl cp /tmp/1 mlflow-86c769bddb-4klrz:/tmp
