## 27.1 Experiment Tracking with MLFlow
Boise State University\
Department of Computer Science\
CS 334 Algorithms of Machine Learning\
Conrad Kennington\
Fall 2020

In [1]:
# import mlfow for experiment tracking -- installed with pip3
import mlflow
from mlflow import log_metric, log_param, log_artifacts
mlflow.set_experiment("diabetes-decision-tree-exp")

INFO: 'diabetes-decision-tree-exp' does not exist. Creating a new experiment


In [2]:
# Load libraries
import os
import pandas as pd
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation

### Import Diabetes Dataset Directly

In [3]:
col_names = ['pregnant', 'glucose', 'bp', 'skin', 'insulin', 'bmi', 'pedigree', 'age', 'label']
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
pima = pd.read_csv(url, names=col_names)
feature_cols = ['pregnant', 'insulin', 'bmi', 'age','glucose','bp','pedigree']
X = pima[feature_cols] # Features
y = pima.label # Target variable

### Split Data

In [4]:
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test

# Loop over 12 training iterations, varying the max depth of the tree

In [5]:
for x in range(3, 15):
  clf = DecisionTreeClassifier(max_depth=x)
  clf = clf.fit(X_train,y_train)
  y_pred = clf.predict(X_test)

  # track experiment
  with mlflow.start_run() as run:
    log_param("max_depth", x)
    log_metric("accuracy_score", metrics.accuracy_score(y_test, y_pred))
    
    # create AUC curve image
    fpr, tpr, threshold = metrics.roc_curve(y_test, y_pred)
    roc_auc = metrics.auc(fpr, tpr)
    log_metric("roc_auc", roc_auc)

    # method I: plt
    import matplotlib.pyplot as plt
    plt.title('Receiver Operating Characteristic')
    plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
    plt.legend(loc = 'lower right')
    plt.plot([0, 1], [0, 1],'r--')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    filename = 'roc{}.pdf'.format(x)
    plt.savefig(filename)
    mlflow.log_artifact(filename)
    plt.clf() # clear for the next run

## http://localhost:5000/#/

<Figure size 432x288 with 0 Axes>