#### Importando bibliotecas

In [None]:
# manipulação de dados
import pandas as pd
import numpy as np
# machine learning
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn import metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# manipulação de imagens
import matplotlib
import matplotlib.pyplot as plt
# gerencialmento do MLFlow
import mlflow
import mlflow.sklearn

#### Lendo dados
##### Neste tutorial será utilizado o conjunto de dados IRIS, comumente utilizado para testar algoritmos de classificação <https://archive.ics.uci.edu/ml/datasets/iris>. Este dataset contém 3 classes de iris (gênero de planta), cada qual com 50 instâncias.

In [None]:
iris_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
iris_df = pd.read_csv(iris_url, header=None).set_axis(names, axis=1)
iris_df.head()

#### Separação em conjuntos de treinamento e test
##### O dataset foi dividido na proporção 70%/30%, respectivamente para os conjunto de validação e teste.

In [None]:
train, test = train_test_split(iris_df, test_size = 0.3, stratify = iris_df["species"], random_state = 42)
X_train = train[['sepal_length','sepal_width','petal_length','petal_width']]
y_train = train.species
X_test = test[['sepal_length','sepal_width','petal_length','petal_width']]
y_test = test.species

#### Criação do ambiente e gerenciamento dos experimentos no MLflow

In [None]:
#mlflow.create_experiment("Classification-iris")

In [128]:
mlflow.set_experiment('Classification-iris') 
with mlflow.start_run():
    #lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
    #lr.fit(train_x, train_y)
    max_depth = 5
    criterion = 'entropy' #“entropy”, “log_loss”
    splitter = 'ramdom' #ramdom
    dt_model = DecisionTreeClassifier(max_depth = max_depth, criterion = criterion
                                      , splitter='best', random_state = 42)
    dt_model.fit(X_train, y_train)
   
    fn = ["sepal_length", "sepal_width", "petal_length", "petal_width"]
    cn = ['setosa', 'versicolor', 'virginica']
    
    plt.figure(figsize = (12, 10))
    plot_tree(mod_dt, feature_names = fn, class_names = cn, filled = True)
    display()
    plt_ = 'dt_figure.png' 
    plt.savefig(plt_)
    mlflow.log_artifact(plt_)
    plt.close()
    
    plt.figure(figsize = (12, 10))
    mc = metrics.plot_confusion_matrix(mod_dt, X_test, y_test,
                                 display_labels=cn,
                                 cmap=plt.cm.Blues,
                                 normalize=None)
    display()
    plt_ = 'dt_confusion_matrix.png' 
    plt.savefig(plt_)
    mlflow.log_artifact(plt_)
    plt.close()
   
    #predicted_qualities = lr.predict(test_x)
    prediction = dt_model.predict(X_test)
    
    acc = metrics.accuracy_score(prediction, y_test)
    f1 = metrics.f1_score(prediction, y_test, average='weighted')
    precision = metrics.precision_score(prediction, y_test, average='weighted')

    print(f"DT model (max_depth={5}):")
    print(f"Accuracy: {round(acc,3)}")
    print(f"F1: {round(f1,3)}")
    print(f"Precision: {round(precision,3)}")

    mlflow.log_param("max_depth", max_depth)
    mlflow.log_param("criterion", criterion)
    mlflow.log_param("splitter", splitter)
    mlflow.log_metric("acc", acc)
    mlflow.log_metric("f1", f1)
    mlflow.log_metric("precision", precision)

    mlflow.sklearn.log_model(dt_model, "model")



DT model (max_depth=5):
Accuracy: 0.889
F1: 0.889
Precision: 0.89


<Figure size 864x720 with 0 Axes>

#### Consumo do modelo previamente salvo

In [118]:
# Criação de um vetor com as informação a serem utilizadas para realizar
# uma nova previsão (cada valor se referente a um dos 5 atributos do dataset)
data_to_predict = np.array([5.1, 3.5, 1.4, 0.2])

In [129]:
# Log do modelo de interesse. Obs.: a lista contendo todos os modelos pode 
# ser vizualida a partir da interface do MLflow
logged_model = 'runs:/fac567bde1f043a4afff020bb304575f/model'

# Carregando o modelo de interesse.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Realização da previsão para uma nova instância.
import pandas as pd
data_predict = data_to_predict.reshape(1, -1)
loaded_model.predict(data_predict)



array(['Iris-setosa'], dtype=object)