In [1]:
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import numpy as np
import matplotlib.pyplot as plt
import mlflow
import mlflow.sklearn
import shutil  
import os

In [2]:
#Delete last runs directory
try:
    shutil.rmtree('./mlruns')
except FileNotFoundError:
    print("WARNING: Can't find folder mlruns")

In [3]:
data, target = load_boston(return_X_y=True)

df = pd.DataFrame(data)

X_train, X_test, y_train, y_test = train_test_split(df,target,random_state=0)

In [4]:
def calculate_errors(y,ypred):
    mae = mean_absolute_error(y,ypred)
    mse = mean_squared_error(y, ypred)
    R2 = r2_score(y,ypred)
    y,ypred = np.array(y), np.array(ypred)
    mape = np.mean(np.abs((y - ypred) / y)) * 100
#     print("MAE:{0:.3f}, MSE:{1:.2f}, R2:{2:.2f}".format(mae, mse, R2))
    return mae,mse,R2,mape

## Train model

In [5]:
def train_elastic_net(a,l,exp = None):
    with mlflow.start_run(experiment_id=exp): #start mlflow run
        en = ElasticNet(alpha=a,l1_ratio=l)
        en.fit(X_train,y_train)
        y_pred = en.predict(X_test)
        
        #calculate errors
        mae,mse,R2,mape = calculate_errors(y_test,y_pred)
        errors = mae,mse,R2,mape
        print("MAE:{0:.3f}, MSE:{1:.2f}, R2:{2:.2f}".format(mae, mse, R2))
        
        #log metris and parmeters
        mlflow.log_metrics({"MAE":mae,"MSE":mse, "R2":R2, "MAPE":mape})
        mlflow.log_params({"alpha":a,"l1_ratio":l})
        
        #register model
        mlflow.sklearn.log_model(en, "model")
        
        #save error plot
        plt.figure()
        plt.bar(['mae','mse','R2','mape'],errors,color=['blue','red','green','orange']);
        plt.title("Errors")
        plt.savefig("errors.png")
        plt.close()
        mlflow.log_artifact("errors.png")

## Predict with different alpha and l1 ratio values

In [6]:
 for a in np.arange(0.1, 1, 0.25):
        for l in np.arange(0.1, 1, 0.25):
            train_elastic_net(a,l)

MAE:3.787, MSE:32.25, R2:0.61
MAE:3.776, MSE:32.21, R2:0.61
MAE:3.764, MSE:32.16, R2:0.61
MAE:3.749, MSE:32.11, R2:0.61
MAE:3.882, MSE:33.66, R2:0.59
MAE:3.866, MSE:33.58, R2:0.59
MAE:3.845, MSE:33.41, R2:0.59
MAE:3.810, MSE:33.10, R2:0.59
MAE:3.936, MSE:34.57, R2:0.58
MAE:3.929, MSE:34.60, R2:0.58
MAE:3.914, MSE:34.49, R2:0.58
MAE:3.887, MSE:34.24, R2:0.58
MAE:3.966, MSE:35.23, R2:0.57
MAE:3.968, MSE:35.38, R2:0.57
MAE:3.970, MSE:35.47, R2:0.57
MAE:3.968, MSE:35.53, R2:0.57


In [7]:
print("target mean:{}".format(target.mean()))
print("target std:{}".format(target.std()))

target mean:22.532806324110677
target std:9.188011545278203


## new experiment

In [8]:
exp = mlflow.create_experiment(name="normalized data")

In [9]:
from sklearn import preprocessing

In [10]:
data = preprocessing.normalize(data)
df = pd.DataFrame(data)

X_train, X_test, y_train, y_test = train_test_split(df,target,random_state=0)

In [11]:
data.shape

(506, 13)

In [12]:
 for a in np.arange(0.1, 1, 0.25):
        for l in np.arange(0.1, 1, 0.25):
            train_elastic_net(a,l,exp)

MAE:5.762, MSE:72.00, R2:0.12
MAE:5.731, MSE:71.32, R2:0.13
MAE:5.698, MSE:70.70, R2:0.13
MAE:5.723, MSE:70.57, R2:0.14
MAE:6.041, MSE:76.93, R2:0.06
MAE:6.015, MSE:76.32, R2:0.07
MAE:5.959, MSE:75.21, R2:0.08
MAE:5.821, MSE:72.91, R2:0.11
MAE:6.121, MSE:78.68, R2:0.04
MAE:6.111, MSE:78.46, R2:0.04
MAE:6.089, MSE:77.95, R2:0.05
MAE:6.026, MSE:76.54, R2:0.06
MAE:6.163, MSE:79.55, R2:0.03
MAE:6.165, MSE:79.58, R2:0.03
MAE:6.166, MSE:79.59, R2:0.03
MAE:6.166, MSE:79.59, R2:0.03


# Find best model

In [13]:
df = mlflow.search_runs()
run_id = df.loc[df['metrics.MAE'].idxmin()]['run_id']
print("Minimum error run_id: ",run_id)

Minimum error run_id:  7d19abac51d44de984b4c1acdd44c4bc


In [14]:
df

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.R2,metrics.MAPE,metrics.MSE,metrics.MAE,params.l1_ratio,params.alpha,tags.mlflow.source.type,tags.mlflow.user,tags.mlflow.log-model.history,tags.mlflow.source.name
0,51935af4cbc84393ae8c91fb1c0d17c0,0,FINISHED,file:///C:/ie/session2/mlruns/0/51935af4cbc843...,2021-05-30 18:57:27.606000+00:00,2021-05-30 18:57:28.048000+00:00,0.565145,18.322409,35.527237,3.968131,0.8499999999999999,0.8499999999999999,LOCAL,jicav,"[{""run_id"": ""51935af4cbc84393ae8c91fb1c0d17c0""...",C:\Users\jicav\anaconda3\lib\site-packages\ipy...
1,4ed97f63f3784562b4e02c4757b2b1f1,0,FINISHED,file:///C:/ie/session2/mlruns/0/4ed97f63f37845...,2021-05-30 18:57:26.914000+00:00,2021-05-30 18:57:27.597000+00:00,0.565791,18.34426,35.474462,3.969933,0.6,0.8499999999999999,LOCAL,jicav,"[{""run_id"": ""4ed97f63f3784562b4e02c4757b2b1f1""...",C:\Users\jicav\anaconda3\lib\site-packages\ipy...
2,41dcc790417340c4a3f84d2588d04abe,0,FINISHED,file:///C:/ie/session2/mlruns/0/41dcc790417340...,2021-05-30 18:57:26.426000+00:00,2021-05-30 18:57:26.908000+00:00,0.566991,18.35308,35.376397,3.968318,0.35,0.8499999999999999,LOCAL,jicav,"[{""run_id"": ""41dcc790417340c4a3f84d2588d04abe""...",C:\Users\jicav\anaconda3\lib\site-packages\ipy...
3,e8d87e97f1084b75a181934271093872,0,FINISHED,file:///C:/ie/session2/mlruns/0/e8d87e97f1084b...,2021-05-30 18:57:25.869000+00:00,2021-05-30 18:57:26.415000+00:00,0.568795,18.365372,35.229018,3.966483,0.1,0.8499999999999999,LOCAL,jicav,"[{""run_id"": ""e8d87e97f1084b75a181934271093872""...",C:\Users\jicav\anaconda3\lib\site-packages\ipy...
4,8a60d47c05244287beb67a0c984b7c87,0,FINISHED,file:///C:/ie/session2/mlruns/0/8a60d47c052442...,2021-05-30 18:57:25.321000+00:00,2021-05-30 18:57:25.861000+00:00,0.580933,18.02963,34.237367,3.886942,0.8499999999999999,0.6,LOCAL,jicav,"[{""run_id"": ""8a60d47c05244287beb67a0c984b7c87""...",C:\Users\jicav\anaconda3\lib\site-packages\ipy...
5,7477c21f92674890b348b78222b662c5,0,FINISHED,file:///C:/ie/session2/mlruns/0/7477c21f926748...,2021-05-30 18:57:24.806000+00:00,2021-05-30 18:57:25.307000+00:00,0.577847,18.155179,34.489518,3.914044,0.6,0.6,LOCAL,jicav,"[{""run_id"": ""7477c21f92674890b348b78222b662c5""...",C:\Users\jicav\anaconda3\lib\site-packages\ipy...
6,81165c7a64874041a232d026e42429da,0,FINISHED,file:///C:/ie/session2/mlruns/0/81165c7a648740...,2021-05-30 18:57:24.211000+00:00,2021-05-30 18:57:24.797000+00:00,0.57653,18.228895,34.597062,3.928626,0.35,0.6,LOCAL,jicav,"[{""run_id"": ""81165c7a64874041a232d026e42429da""...",C:\Users\jicav\anaconda3\lib\site-packages\ipy...
7,062400f7da4c44d2941ca14471a5ddba,0,FINISHED,file:///C:/ie/session2/mlruns/0/062400f7da4c44...,2021-05-30 18:57:23.717000+00:00,2021-05-30 18:57:24.203000+00:00,0.576849,18.27414,34.57104,3.935648,0.1,0.6,LOCAL,jicav,"[{""run_id"": ""062400f7da4c44d2941ca14471a5ddba""...",C:\Users\jicav\anaconda3\lib\site-packages\ipy...
8,cc6e139bb15b427b82ef5dce2494d3ab,0,FINISHED,file:///C:/ie/session2/mlruns/0/cc6e139bb15b42...,2021-05-30 18:57:23.191000+00:00,2021-05-30 18:57:23.707000+00:00,0.594847,17.773695,33.100608,3.809643,0.8499999999999999,0.35,LOCAL,jicav,"[{""run_id"": ""cc6e139bb15b427b82ef5dce2494d3ab""...",C:\Users\jicav\anaconda3\lib\site-packages\ipy...
9,29626f43bf9d437091b8de365b965c27,0,FINISHED,file:///C:/ie/session2/mlruns/0/29626f43bf9d43...,2021-05-30 18:57:22.709000+00:00,2021-05-30 18:57:23.182000+00:00,0.591077,17.913881,33.408624,3.844676,0.6,0.35,LOCAL,jicav,"[{""run_id"": ""29626f43bf9d437091b8de365b965c27""...",C:\Users\jicav\anaconda3\lib\site-packages\ipy...


## We can serve the model as a REST API 
`mlflow models serve -m [model_path] -p [port]`

In [15]:
#mejor modelo
print("mlflow models serve -m ./mlruns/0/{0}/artifacts/model -p 5354".format(run_id))

mlflow models serve -m ./mlruns/0/7d19abac51d44de984b4c1acdd44c4bc/artifacts/model -p 5354


## And then call the API to make a prediction for us 


In [18]:
#first we need to turn one of the test rows into json format
X_test.loc[[329]].to_json(orient='split')

'{"columns":[0,1,2,3,4,5,6,7,8,9,10,11,12],"index":[329],"data":[[0.0001176914,0.0,0.0056710311,0.0,0.0008051464,0.0110847655,0.0301054738,0.0091272095,0.007001273,0.7526368457,0.0295803784,0.656736909,0.0128473359]]}'

And then we are ready to call the API service and get a reply  
`curl http://127.0.0.1:5354/invocations -H 'Content-Type: application/json' -d '{"columns":[0,1,2,3,4,5,6,7,8,9,10,11,12],"index":[329],"data":[[0.0001176914,0.0,0.0056710311,0.0,0.0008051464,0.0110847655,0.0301054738,0.0091272095,0.007001273,0.7526368457,0.0295803784,0.656736909,0.0128473359]]}'`

In [21]:
!curl -X POST -H "Content-Type:application/json; format=pandas-split" --data "{\"columns\":[\"0\", \"1\", \"2\", \"3\", \"4\", \"5\", \"6\", \"7\", \"8\", \"9\", \"10\", \"11\", \"12\"],\"data\":[[0.06724,0.0,3.24,0.0,0.46,6.333,17.2,5.2146,4.0,430.0,16.9,375.21,7.34]]}" http://127.0.0.1:5354/invocations

[24.896959855052206]


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed

  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100   182  100    20  100   162   3333  27000 --:--:-- --:--:-- --:--:-- 30333


#### to run a MLFlow project directly from GitHub
mlflow run git@github.com:mlflow/mlflow-example.git -P alpha=1