In [1]:
import mlflow
import pandas as pd

def get_parameters_list(data):
    params  = data['params']
    return [params['model'], params['dataset']]


def get_metrics_list(data):
    if ('metrics' in data.keys()) and data['metrics']:
        metrics = data['metrics']
        return [metrics['accuracy'], metrics['f1_score']]
    else:
        return ['-', '-']

In [2]:
# List all existing experiments
all_experiments = mlflow.list_experiments()
exp_data = []
for exp in all_experiments:
    row = [exp.name, exp.artifact_location, exp.lifecycle_stage]
    exp_data.append(row)
exp_frame = pd.DataFrame(exp_data, columns = ['Name', 'Artifacts', 'Status'])
print(exp_frame)

              Name                                          Artifacts  Status
0  DatasetAnalysis  file:///mnt/nfs/home/koshkinam/vector-delirium...  active
1          Default  file:///mnt/nfs/home/koshkinam/vector-delirium...  active
2  ModelComparison  file:///mnt/nfs/home/koshkinam/vector-delirium...  active


In [5]:
# For model training experiment - display last 100 runs with a subset of parameters and metrics
runs = mlflow.list_run_infos('0', max_results=100)
data = []
for r in runs:
    run_data = mlflow.get_run(r.run_id).to_dictionary()['data']
    row = [r.start_time] + get_parameters_list(run_data) + get_metrics_list(run_data)
    data.append(row)
frame = pd.DataFrame(data, columns=['Start time', 'Model', 'Dataset', 'Accuracy', 'F1 Score'])
print('------------------- Model Training Runs ----------------------')
print(frame)

------------------- Model Training Runs ----------------------
       Start time Model   Dataset  Accuracy  F1 Score
0   1635434580535   mlp    gemini         -         -
1   1634918781255   mlp    gemini   0.67013  0.282486
2   1634915740928   mlp    gemini  0.728733  0.350343
3   1634915064631   mlp    gemini  0.728415  0.349581
4   1634914831011   mlp    gemini  0.728097   0.35129
5   1634914772549   mlp    gemini         -         -
6   1634850932565   mlp    gemini   0.69493       0.0
7   1634850843801   mlp    gemini  0.667506       0.0
8   1634850407660   mlp    gemini         -         -
9   1634849799501   mlp    gemini         -         -
10  1634849591931   mlp    gemini         -         -
11  1634849438673   mlp    gemini         -         -
12  1634849231167   mlp    gemini         -         -
13  1634848865744   mlp    gemini         -         -
14  1634848406375   mlp    gemini         -         -
15  1634848303526   mlp    gemini         -         -
16  1634848185530  

In [11]:
# Display dataset drift analysis runs

import os
import json

def get_dataset_metrics_list(data):
     if ('metrics' in data.keys()) and data['metrics']:
        metrics = data['metrics']
        #timestamp = data['params']['timestamp']
        drift = 'No' if metrics['dataset_drift']==0 else 'Yes'
        return [drift, metrics['n_features'], metrics['n_drifted_features']]
     else:
        return ['-', '-', '-']

exp = mlflow.get_experiment_by_name('DatasetAnalysis')
runs = mlflow.list_run_infos(exp.experiment_id, max_results=100)
table = []
for r in runs:
    exp_run = mlflow.get_run(r.run_id).to_dictionary()
    path = exp_run['info']['artifact_uri'][6:]
    config_file = os.path.join(path, 'config.json')
    if not os.path.isfile(config_file):
        continue
    with open(config_file) as f:
        data = json.load(f)
        row = [data['input'], data['slice'], data['data_ref'], data['data_eval']]
        row = row + get_dataset_metrics_list(exp_run['data'])
        table.append(row)
frame = pd.DataFrame(table, columns=['Input', 'Slice', 'Ref Slice', 'Eval Slice', 'Drift', 'Feat', 'Drift_Feat'])
print('------------------- Dataset Analysis ----------------------')
print(frame)

------------------- Dataset Analysis ----------------------
                Input Slice Ref Slice Eval Slice Drift  Feat Drift_Feat
0  ../gemini_data.csv  year    [2015]     [2016]    No  29.0        3.0
1  ../gemini_data.csv  year    [2015]     [2016]     -     -          -
2  ../gemini_data.csv  year    [2015]     [2016]     -     -          -
3  ../gemini_data.csv  year    [2015]     [2016]     -     -          -
4  ../gemini_data.csv  year    [2015]     [2016]     -     -          -
5  ../gemini_data.csv  year    [2015]     [2016]     -     -          -
6  ../gemini_data.csv  year    [2015]     [2016]     -     -          -
7  ../gemini_data.csv  year    [2015]     [2016]     -     -          -
8  ../gemini_data.csv  year    [2015]     [2016]     -     -          -
9  ../gemini_data.csv  year    [2015]     [2016]     -     -          -


In [None]:
# Display dataset drift analysis runs

exp = mlflow.get_experiment_by_name('ModelComparison')
runs = mlflow.list_run_infos(exp.experiment_id, max_results=100)
table = []
for r in runs:
    exp_run = mlflow.get_run(r.run_id).to_dictionary()
    path = exp_run['info']['artifact_uri'][6:]
    config_file = os.path.join(path, 'config.json')
    if not os.path.isfile(config_file):
        continue
    with open(config_file) as f:
        data = json.load(f)
        row = [data['referece'], data['test']]
        #row = row + get_dataset_metrics_list(exp_run['data'])
        table.append(row)
frame = pd.DataFrame(table, columns=['Reference', 'Eval', 'Ref Acc', 'Eval Acc', 'Ref F1 Score', 'Eval F1 Score'])
print('------------------- Model Peformance Comparison  ----------------------')
print(frame)