In [2]:
import os

import pandas as pd
import xgboost as xgb

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss, confusion_matrix

import matplotlib.pyplot as plt
import seaborn as sns

import mlflow
from mlflow import log_metric, log_param, log_params, log_artifacts, log_artifact

In [16]:
def main(split_random_state=0, model_random_state=0):
    mlflow.set_experiment('random_exp')

    # prepare train and test data
    iris = datasets.load_iris()
    X = iris.data
    y = iris.target
    class_names = iris.target_names
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=split_random_state)
    
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dtest = xgb.DMatrix(X_test, label=y_test)

    with mlflow.start_run(nested=True):
        # train model
        params = {
            'objective': 'multi:softprob',
            'num_class': 3,
            'eval_metric': 'mlogloss',
            'colsample_bytree': 0.8,
            'subsample': 0.9,
            'seed': model_random_state,
        }     

        # log params received
        log_param('split_random_state', split_random_state)
        log_param('model_random_state', model_random_state)
        log_param('model_params', params)

        model = xgb.train(params, dtrain, evals=[(dtrain, 'train')], verbose_eval=False)

        # evaluate model
        y_proba = model.predict(dtest)
        y_pred = y_proba.argmax(axis=1)
        loss = log_loss(y_test, y_proba)
        acc = accuracy_score(y_test, y_pred)

        # log metrics
        mlflow.log_metrics({'log_loss': loss, 'accuracy': acc})
        
        # plot confusion matrix
        cm = confusion_matrix(y_test, y_pred)
        cm_df = pd.DataFrame(cm,
                     index = class_names, 
                     columns = class_names)
        plt.figure(figsize=(5.5,4))
        confusion_plot = sns.heatmap(cm_df, annot=True)
        plt.title('XGBoost split:{0} model:{1} \nAccuracy:{2:.3f}'.format(
            split_random_state, model_random_state, acc))
        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        fig = confusion_plot.get_figure()
        fig.savefig("confusion_matrix.png")
        plt.close()
        
        log_artifact("confusion_matrix.png")

In [17]:
for split_random_state in np.arange(0,100):
    main(split_random_state)
for model_random_state in np.arange(0,100):
    main(0, model_random_state)

INFO: 'random_exp' does not exist. Creating a new experiment
