In [12]:
import mlflow
import mlflow.sklearn
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment(experiment_name='XP_test')
tags = {
        "Projeto": "Teste Tecnico XP",
        "team": "MLops",
        "dataset": "Wine"
       }

In [13]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split


wine_data = load_wine()
X = wine_data.data
y = wine_data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)



In [14]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import seaborn as sns

def metricas(y_test, y_predict):
    acuracia = accuracy_score(y_test, y_predict)
    precision = precision_score(y_test, y_predict,average='weighted')
    recall = recall_score(y_test, y_predict,average='weighted')
    f1 = f1_score(y_test, y_predict,average='weighted')
    return acuracia, precision, recall, f1

def matriz_confusao(y_test, y_predict):
    matriz_conf = confusion_matrix(y_test, y_predict)
    fig = plt.figure()
    ax = plt.subplot()
    sns.heatmap(matriz_conf, annot=True, cmap='Blues', ax=ax);

    ax.set_xlabel('Valor Predito');
    ax.set_ylabel('Valor Real'); 
    ax.set_title('Matriz de Confusão'); 
    ax.xaxis.set_ticklabels(['Classe 1', 'Classe 2', 'Classe 3']);
    ax.yaxis.set_ticklabels(['Classe 1', 'Classe 2', 'Classe 3']);
    plt.close()
    return fig


In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
import os



with mlflow.start_run(run_name='random_forest'):
    rf_model = RandomForestClassifier(random_state=42)

    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10]
    }

    grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)

    grid_search.fit(X_train, y_train)

    print("Melhores Parâmetros:", grid_search.best_params_)

    best_rf_model = grid_search.best_estimator_
    y_pred = best_rf_model.predict(X_test)

    acuracia, precision, recall, f1 = metricas(y_test, y_pred)
    matriz_conf = matriz_confusao(y_test, y_pred)

    temp_name = "confusion-matrix.png"
    matriz_conf.savefig(temp_name)

    mlflow.log_artifact(temp_name, "confusion-matrix-plots")

    try:
        os.remove(temp_name)
    except FileNotFoundError as e:
        print(f"{temp_name} file is not found")

    mlflow.log_param("n_estimators", grid_search.best_params_["n_estimators"])
    mlflow.log_param("max_depth", grid_search.best_params_["max_depth"])
    mlflow.log_param("min_samples_split", grid_search.best_params_["min_samples_split"])
    mlflow.log_metric("Acuracia", acuracia)
    mlflow.log_metric("Precision", precision)
    mlflow.log_metric("Recall", recall)
    mlflow.log_metric("F1-Score", f1)

    mlflow.sklearn.log_model(sk_model=best_rf_model,
        artifact_path="wine-model",
        registered_model_name="xp-wine-random-forest-gridsearch")
    
    # Registrar o modelo no Model Registry
    model_uri = "runs:/{}/wine-model".format(mlflow.active_run().info.run_id)
    result = mlflow.register_model(model_uri=model_uri, name="xp-wine-random-forest-gridsearch")

    print("Modelo registrado com sucesso:", result)


Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] END max_depth=None, min_samples_split=2, n_estimators=50; total time=   0.1s
[CV] END max_depth=None, min_samples_split=2, n_estimators=50; total time=   0.1s
[CV] END max_depth=None, min_samples_split=2, n_estimators=50; total time=   0.1s
[CV] END max_depth=None, min_samples_split=5, n_estimators=50; total time=   0.1s
[CV] END max_depth=None, min_samples_split=5, n_estimators=50; total time=   0.1s
[CV] END max_depth=None, min_samples_split=2, n_estimators=50; total time=   0.1s
[CV] END max_depth=None, min_samples_split=2, n_estimators=50; total time=   0.1s
[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time=   0.1s
[CV] END max_depth=None, min_samples_split=5, n_estimators=50; total time=   0.1s
[CV] END max_depth=None, min_samples_split=5, n_estimators=50; total time=   0.1s
[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time=   0.1s
[CV] END max_depth=None, min_sampl

Registered model 'xp-wine-random-forest-gridsearch' already exists. Creating a new version of this model...
2024/09/22 15:44:17 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: xp-wine-random-forest-gridsearch, version 10
Created version '10' of model 'xp-wine-random-forest-gridsearch'.
Registered model 'xp-wine-random-forest-gridsearch' already exists. Creating a new version of this model...
2024/09/22 15:44:17 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: xp-wine-random-forest-gridsearch, version 11
Created version '11' of model 'xp-wine-random-forest-gridsearch'.
2024/09/22 15:44:17 INFO mlflow.tracking._tracking_service.client: 🏃 View run random_forest at: http://127.0.0.1:5000/#/experiments/526684593343465379/runs/8034eb4607f34557a0e0be8cb1bb5f66.
2024/09/22 15:44:17 INFO mlflow.tracking._tracking_service.client: 🧪 View experime

Modelo registrado com sucesso: <ModelVersion: aliases=[], creation_timestamp=1727030657040, current_stage='None', description='', last_updated_timestamp=1727030657040, name='xp-wine-random-forest-gridsearch', run_id='8034eb4607f34557a0e0be8cb1bb5f66', run_link='', source='mlflow-artifacts:/526684593343465379/8034eb4607f34557a0e0be8cb1bb5f66/artifacts/wine-model', status='READY', status_message='', tags={}, user_id='', version='11'>
