In [1]:
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import numpy as np
import matplotlib.pyplot as plt
import mlflow
import mlflow.sklearn
import shutil  
import os

In [2]:
#Delete last runs directory
try:
    shutil.rmtree('./mlruns')
except FileNotFoundError:
    print("WARNING: Can't find folder mlruns")

In [3]:
data, target = load_boston(return_X_y=True)

df = pd.DataFrame(data)

X_train, X_test, y_train, y_test = train_test_split(df,target,random_state=0)

In [4]:
def calculate_errors(y,ypred):
    mae = mean_absolute_error(y,ypred)
    mse = mean_squared_error(y, ypred)
    R2 = r2_score(y,ypred)
    y,ypred = np.array(y), np.array(ypred)
    mape = np.mean(np.abs((y - ypred) / y)) * 100
#     print("MAE:{0:.3f}, MSE:{1:.2f}, R2:{2:.2f}".format(mae, mse, R2))
    return mae,mse,R2,mape

## Train model

In [5]:
def train_elastic_net(a,l,exp = None):
    with mlflow.start_run(experiment_id=exp): #start mlflow run
        en = ElasticNet(alpha=a,l1_ratio=l)
        en.fit(X_train,y_train)
        y_pred = en.predict(X_test)
        
        #calculate errors
        mae,mse,R2,mape = calculate_errors(y_test,y_pred)
        errors = mae,mse,R2,mape
        print("MAE:{0:.3f}, MSE:{1:.2f}, R2:{2:.2f}".format(mae, mse, R2))
        
        #log metris and parmeters
        mlflow.log_metrics({"MAE":mae,"MSE":mse, "R2":R2, "MAPE":mape})
        mlflow.log_params({"alpha":a,"l1_ratio":l})
        
        #register model
        mlflow.sklearn.log_model(en, "model")
        
        #save error plot
        plt.figure()
        plt.bar(['mae','mse','R2','mape'],errors,color=['blue','red','green','orange']);
        plt.title("Errors")
        plt.savefig("errors.png")
        plt.close()
        mlflow.log_artifact("errors.png")

## Predict with different alpha and l1 ratio values

In [6]:
 for a in np.arange(0.1, 1, 0.25):
        for l in np.arange(0.1, 1, 0.25):
            train_elastic_net(a,l)

MAE:3.787, MSE:32.25, R2:0.61
MAE:3.776, MSE:32.21, R2:0.61
MAE:3.764, MSE:32.16, R2:0.61
MAE:3.749, MSE:32.11, R2:0.61
MAE:3.882, MSE:33.66, R2:0.59
MAE:3.866, MSE:33.58, R2:0.59
MAE:3.845, MSE:33.41, R2:0.59
MAE:3.810, MSE:33.10, R2:0.59
MAE:3.936, MSE:34.57, R2:0.58
MAE:3.929, MSE:34.60, R2:0.58
MAE:3.914, MSE:34.49, R2:0.58
MAE:3.887, MSE:34.24, R2:0.58
MAE:3.966, MSE:35.23, R2:0.57
MAE:3.968, MSE:35.38, R2:0.57
MAE:3.970, MSE:35.47, R2:0.57
MAE:3.968, MSE:35.53, R2:0.57


In [7]:
print("target mean:{}".format(target.mean()))
print("target std:{}".format(target.std()))

target mean:22.532806324110677
target std:9.188011545278203


## new experiment

In [8]:
exp = mlflow.create_experiment(name="normalized data")

In [9]:
from sklearn import preprocessing

In [10]:
data = preprocessing.normalize(data)
df = pd.DataFrame(data)

X_train, X_test, y_train, y_test = train_test_split(df,target,random_state=0)

In [11]:
data.shape

(506, 13)

In [12]:
 for a in np.arange(0.1, 1, 0.25):
        for l in np.arange(0.1, 1, 0.25):
            train_elastic_net(a,l,exp)

MAE:5.762, MSE:72.00, R2:0.12
MAE:5.731, MSE:71.32, R2:0.13
MAE:5.698, MSE:70.70, R2:0.13
MAE:5.723, MSE:70.57, R2:0.14
MAE:6.041, MSE:76.93, R2:0.06
MAE:6.015, MSE:76.32, R2:0.07
MAE:5.959, MSE:75.21, R2:0.08
MAE:5.821, MSE:72.91, R2:0.11
MAE:6.121, MSE:78.68, R2:0.04
MAE:6.111, MSE:78.46, R2:0.04
MAE:6.089, MSE:77.95, R2:0.05
MAE:6.026, MSE:76.54, R2:0.06
MAE:6.163, MSE:79.55, R2:0.03
MAE:6.165, MSE:79.58, R2:0.03
MAE:6.166, MSE:79.59, R2:0.03
MAE:6.166, MSE:79.59, R2:0.03


## We can serve the model as a REST API 
`mlflow models serve -m [model_path] -p [port]`

## And then call the API to make a prediction for us 


In [13]:
#first we need to turn one of the test rows into json format
X_test.loc[[329]].to_json(orient='split')

'{"columns":[0,1,2,3,4,5,6,7,8,9,10,11,12],"index":[329],"data":[[0.0001176914,0.0,0.0056710311,0.0,0.0008051464,0.0110847655,0.0301054738,0.0091272095,0.007001273,0.7526368457,0.0295803784,0.656736909,0.0128473359]]}'

And then we are ready to call the API service and get a reply  
`curl http://127.0.0.1:2323/invocations -H 'Content-Type: application/json' -d '{"columns":[0,1,2,3,4,5,6,7,8,9,10,11,12],"data":[[0.06724,0.0,3.24,0.0,0.46,6.333,17.2,5.2146,4.0,430.0,16.9,375.21,7.34]]}'`

## We can also use mlflow to make a prediction out of a csv or json file
First, we need to have the data we want to predict in csv format


In [14]:
X_test.to_csv (r'data.csv', index = False, header=True)

`mlflow models predict -m mlruns/1/281e22a41418413c8e0ca608aeb78289/artifacts/model/ -i data.csv  -t csv`