# Demonstration for mlflow REST API

This notebook demonstrates use of the mlflow tracking REST api to retrieve results from mlflow experiments and place them into a pandas dataframe.

In [7]:
from __future__ import print_function

In [8]:
import pandas as pd
import numpy as np
import os
import os.path
import requests
import socket

from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet

import mlflow
import mlflow.tracking
import mlflow.sklearn

## set up to invoke mlflow tracking REST api

In [9]:
host_name = 'mlflow_server' #socket.gethostname()

BASE_URL = 'http://' + host_name + ':5000'
REST_API_URL = BASE_URL + '/api/2.0/preview/mlflow'

print(BASE_URL, REST_API_URL)

http://mlflow_server:5000 http://mlflow_server:5000/api/2.0/preview/mlflow


## List all experiments

In [21]:
r = requests.get(REST_API_URL + '/experiments/list')
experiment_list = r.json()
experiment_list

{'experiments': [{'artifact_location': '/artifacts/0',
   'lifecycle_stage': 'active',
   'name': 'mlflow_demo1',
   'experiment_id': '0'},
  {'artifact_location': '/artifacts/1',
   'lifecycle_stage': 'active',
   'name': 'mlflow_demo2',
   'experiment_id': '1'}]}

## Retrieve data for specified experiment

In [22]:
r = requests.get(REST_API_URL + '/experiments/get',
                 json={'experiment_id':'1'})
experiment_data = r.json()


In [23]:
experiment_data['experiment']

{'artifact_location': '/artifacts/1',
 'lifecycle_stage': 'active',
 'name': 'mlflow_demo2',
 'experiment_id': '1'}

In [24]:
experiment_metadata = {'experiment_name':experiment_data['experiment']['name'],
                       'experiment_id' : experiment_data['experiment']['experiment_id']}

## Extract experiment results to pandas dataframe

### Retrieve desired experiment attributes

In [25]:
df1 = pd.DataFrame([{k:experiment_data['experiment'][k] for k in ['name','experiment_id']}])
df1

Unnamed: 0,experiment_id,name
0,1,mlflow_demo2


### Extract run data (metrics, params and tags) 

In [26]:
def extract_run_data(r):
    # seed dataframe row with experiment attributes
    ans = experiment_metadata.copy()
    
    # add run uuid
    ans.update({'run_uuid': r['run_uuid']})
    
    # retrieve metrics, params and tags from the run
    r1 = requests.get(REST_API_URL + '/runs/get',
                      json={'run_uuid': r['run_uuid']})
    run_data = r1.json()['run']['data']
    
    # populate dataframe row with metrics, params and tags 
    for k in run_data.keys():    
        try:
            data = {k+'_'+x['key']:x['value'] for x in run_data[k]}
        except:
            data = {}

        ans.update(data)
        
    # return the dataframe row
    return ans

In [27]:
df = pd.DataFrame([extract_run_data(r) for r in experiment_data['runs']])
df

Unnamed: 0,experiment_id,experiment_name,metrics_r2,params_algorithm,params_alpha,params_data_set_type,params_hidden_layer_sizes,params_l1_ratio,params_lambda,params_learning_rate,params_max_depth,params_max_iter,params_n_estimators,params_random_state,run_uuid,tags_data_set_type,tags_estimator,tags_mlflow.note.content
0,1,mlflow_demo2,0.377378,RandomForestRegressor,,Min/Max values,,,,,7.0,,279.0,13.0,eb8c1041b860417bb2d6c784220ea115,Min/Max values,from sklearn.ensemble import RandomForestRegre...,
1,1,mlflow_demo2,0.377038,RandomForestRegressor,,raw values,,,,,7.0,,279.0,13.0,c365b150023649659b46b92287c7d073,raw values,from sklearn.ensemble import RandomForestRegre...,
2,1,mlflow_demo2,0.500909,XGBRegressor,,standardized values,,,,0.0585124918820747,6.0,,453.0,13.0,d19419a5805540ca8973e1ddd52b366c,standardized values,from xgboost import XGBRegressor,
3,1,mlflow_demo2,0.220901,ElasticNet,0.0094127700809694,Min/Max values,,0.3583337827049697,,,,,,13.0,035650ed17164d549670075170f9fd6e,Min/Max values,from sklearn.linear_model import ElasticNet,
4,1,mlflow_demo2,0.25019,ElasticNet,0.0094127700809694,standardized values,,0.3583337827049697,,,,,,13.0,51781a5b618c4500982c20acb7f8f194,standardized values,from sklearn.linear_model import ElasticNet,
5,1,mlflow_demo2,0.341047,ExtraTreesRegressor,,standardized values,,,,,7.0,,279.0,13.0,aed4f58dc3834207875aeb6ce36883f8,standardized values,from sklearn.ensemble import ExtraTreesRegressor,
6,1,mlflow_demo2,0.118223,MLPRegressor,,raw values,"(10,)",,,,,1422.0,,13.0,25d4a525afd04236b0e388b50361b53f,raw values,from sklearn.neural_network import MLPRegressor,
7,1,mlflow_demo2,0.229264,ElasticNet,0.0094127700809694,raw values,,0.3583337827049697,,,,,,13.0,5bc6ef71ec354cd0aee74b0f78cdc518,raw values,from sklearn.linear_model import ElasticNet,
8,1,mlflow_demo2,0.315244,MLPRegressor,,standardized values,"(10, 10)",,,,,1343.0,,13.0,53d8f661a64a43c5baa485d6d55d7e39,standardized values,from sklearn.neural_network import MLPRegressor,
9,1,mlflow_demo2,0.1476,r-glmnet,0.5,r_raw_data_set,,,0.5,,,,,,23283ef718f34105b2ef26f0e3f3d3f2,r_raw_data_set,glmnet,This is an R model


In [28]:
df[['params_alpha','params_l1_ratio','params_learning_rate','params_max_depth']] = \
    df[['params_alpha','params_l1_ratio','params_learning_rate','params_max_depth']].apply(pd.to_numeric)

In [29]:
df.dtypes

experiment_id                 object
experiment_name               object
metrics_r2                   float64
params_algorithm              object
params_alpha                 float64
params_data_set_type          object
params_hidden_layer_sizes     object
params_l1_ratio              float64
params_lambda                 object
params_learning_rate         float64
params_max_depth             float64
params_max_iter               object
params_n_estimators           object
params_random_state           object
run_uuid                      object
tags_data_set_type            object
tags_estimator                object
tags_mlflow.note.content      object
dtype: object

## Display experiment results sorted by metric

In [30]:
df.sort_values('metrics_r2',ascending=False)

Unnamed: 0,experiment_id,experiment_name,metrics_r2,params_algorithm,params_alpha,params_data_set_type,params_hidden_layer_sizes,params_l1_ratio,params_lambda,params_learning_rate,params_max_depth,params_max_iter,params_n_estimators,params_random_state,run_uuid,tags_data_set_type,tags_estimator,tags_mlflow.note.content
2,1,mlflow_demo2,0.500909,XGBRegressor,,standardized values,,,,0.058512,6.0,,453.0,13.0,d19419a5805540ca8973e1ddd52b366c,standardized values,from xgboost import XGBRegressor,
15,1,mlflow_demo2,0.500365,XGBRegressor,,raw values,,,,0.058512,6.0,,453.0,13.0,30c02b7bd3a44756b75b63c393f961a2,raw values,from xgboost import XGBRegressor,
12,1,mlflow_demo2,0.499639,XGBRegressor,,Min/Max values,,,,0.058512,6.0,,453.0,13.0,6e1eb35633bb456593d48e93a6181cf4,Min/Max values,from xgboost import XGBRegressor,
11,1,mlflow_demo2,0.37756,RandomForestRegressor,,standardized values,,,,,7.0,,279.0,13.0,5a81dd115f324387925efe5cf55703b6,standardized values,from sklearn.ensemble import RandomForestRegre...,
0,1,mlflow_demo2,0.377378,RandomForestRegressor,,Min/Max values,,,,,7.0,,279.0,13.0,eb8c1041b860417bb2d6c784220ea115,Min/Max values,from sklearn.ensemble import RandomForestRegre...,
1,1,mlflow_demo2,0.377038,RandomForestRegressor,,raw values,,,,,7.0,,279.0,13.0,c365b150023649659b46b92287c7d073,raw values,from sklearn.ensemble import RandomForestRegre...,
5,1,mlflow_demo2,0.341047,ExtraTreesRegressor,,standardized values,,,,,7.0,,279.0,13.0,aed4f58dc3834207875aeb6ce36883f8,standardized values,from sklearn.ensemble import ExtraTreesRegressor,
14,1,mlflow_demo2,0.341047,ExtraTreesRegressor,,raw values,,,,,7.0,,279.0,13.0,11386cbcc3da432e94f3549c7ffa01d9,raw values,from sklearn.ensemble import ExtraTreesRegressor,
16,1,mlflow_demo2,0.341047,ExtraTreesRegressor,,Min/Max values,,,,,7.0,,279.0,13.0,8ceaecde057c4098b768db36c80dca43,Min/Max values,from sklearn.ensemble import ExtraTreesRegressor,
8,1,mlflow_demo2,0.315244,MLPRegressor,,standardized values,"(10, 10)",,,,,1343.0,,13.0,53d8f661a64a43c5baa485d6d55d7e39,standardized values,from sklearn.neural_network import MLPRegressor,


In [31]:
df.shape

(17, 18)