# Mlfow Wrapper Experiment Model Train Demo Notebook


In [None]:
import pandas as pd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

In [None]:
from mlflow_utility import experiment, run

## Set a New Experiment
An experiment can thought as a set of Runs

In [None]:
exp = experiment.Experiment(experiment_name = "model_train")

Since once experiment can have multiple runs, we can create a new run

In [None]:
run = exp.start_logging(run_name = 'model_train_run_1')

Let's load the data for wrangling

In [None]:
import pandas as pd
import numpy as np
import joblib
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

# load the diabetes dataset
print("Loading Data...")
diabetes = pd.read_csv('data/diabetes.csv')

# Separate features and labels
X, y = diabetes[['Pregnancies','PlasmaGlucose',
                 'DiastolicBloodPressure','TricepsThickness',
                 'SerumInsulin','BMI',
                 'DiabetesPedigree','Age']].values, diabetes['Diabetic'].values

# Split data into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

# Set regularization hyperparameter
reg = 0.01

# Train a logistic regression model
print('Training a logistic regression model with regularization rate of', reg)
#run.mlflow.log_metric('Regularization Rate',  np.float(reg))

model = LogisticRegression(C=1/reg, solver="liblinear").fit(X_train, y_train)

# calculate accuracy
y_hat = model.predict(X_test)
acc = np.average(y_hat == y_test)
print('Accuracy:', acc)
#run.mlflow.log_metric('Accuracy', np.float(acc))

# calculate AUC
y_scores = model.predict_proba(X_test)
auc = roc_auc_score(y_test,y_scores[:,1])
print('AUC: ' + str(auc))
run.mlflow.log_metric('AUC', np.float(auc))

# Save the trained model in the outputs folder
run.log_object(model, name = 'diabetes_model')

run.end_run()

In [None]:
run.get_latest_logged_metrics()

In [None]:
run.get_latest_logged_parameters()

In [None]:
run.get_latest_logged_artefacts(return_path=True)

## Running from a Script

In [None]:
exp.submit_run(file = 'model_train_script.py', 
               parameters={'--reg_rate':.764})

In [None]:
run.get_latest_logged_metrics()

In [None]:
run.get_latest_logged_parameters()

In [None]:
run.get_latest_logged_artefacts()