## Registar um pipeline no mlflow

In [1]:
import mlflow
import pandas as pd
import numpy as np

from sklearn.pipeline import Pipeline
from sklearn.metrics import  precision_recall_curve, roc_auc_score, confusion_matrix, accuracy_score, recall_score, precision_score, f1_score,auc, roc_curve
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn import tree
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier



In [2]:
root_path = '../data/'
seed = 42
target_col = "default.payment.next.month"

## Definir a diretoria onde as experiências são guardadas

In [3]:
from pathlib import Path

uri = "http://127.0.0.1:5000"

mlflow.set_tracking_uri(uri)

## Fazer set da experiência

In [4]:
mlflow.set_experiment("Lending Prediction Experiment")

2025/03/25 23:07:53 INFO mlflow.tracking.fluent: Experiment with name 'Lending Prediction Experiment' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/448265258201631582', creation_time=1742944073896, experiment_id='448265258201631582', last_update_time=1742944073896, lifecycle_stage='active', name='Lending Prediction Experiment', tags={}>

## Criar os datasets


In [5]:
df = pd.read_csv(root_path + 'lending_data.csv')

df = df.drop('ID', axis = 1)

train_set, test_set = train_test_split(df, test_size = 0.2, random_state = seed)

train_set_staged = train_set.sample(frac=0.2)

X_train = train_set_staged.drop([target_col], axis = 'columns')
y_train = train_set_staged[target_col]

X_test = test_set.drop([target_col], axis = 1)
y_test = test_set[target_col]

X_train.head()

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6
6932,500000.0,1,1,2,37,-1,-1,-1,-1,-1,...,30592.0,154167.0,13410.0,25426.0,60446.0,30594.0,150843.0,163881.0,25426.0,39526.0
6586,360000.0,1,3,1,42,-1,-1,-1,0,-1,...,22648.0,19433.0,16708.0,24047.0,1815.0,22674.0,95.0,16790.0,24283.0,17387.0
13780,20000.0,2,2,1,34,0,0,0,2,3,...,9618.0,10313.0,9863.0,9558.0,1200.0,2000.0,1000.0,0.0,0.0,508.0
4365,170000.0,1,1,1,44,-1,-1,-1,-1,-1,...,1716.0,2442.0,14605.0,2895.0,29069.0,1721.0,2442.0,14605.0,2895.0,9608.0
2301,230000.0,2,1,1,30,-1,-1,0,0,0,...,32450.0,17285.0,9766.0,9981.0,17402.0,20013.0,346.0,5000.0,8000.0,5000.0


In [6]:
# Definição dos modelos e dos seus respectivos hiper-parâmetros para busca em grid
models_and_params = [
    (
        'ann',
        Pipeline(steps=[
            ('scaler', MinMaxScaler()),
            ('Classifier', MLPClassifier(solver = 'lbfgs',  random_state = seed, max_iter = 1000))
        ]),
        {
          "Classifier__hidden_layer_sizes": [(20,), (20,10), (20, 10, 2)],
          'Classifier__learning_rate_init':[0.0001, 0.001, 0.01, 0.1]
        }
    ),
    (
        'random_forest',
        RandomForestClassifier(random_state = seed,  class_weight = 'balanced'),
        {
            'n_estimators':[10, 100, 300, 1000]
        }
    ),
    (
        'decision_tree',
        tree.DecisionTreeClassifier(random_state = seed,  class_weight = 'balanced'),
        {
            'max_depth':[3, 6],
            'min_samples_split': [2, 4, 10]
        }
    ),
    (
        'svm',
        Pipeline(steps=[
            ('scaler', MinMaxScaler()),
            ('Classifier', SVC(random_state = seed, class_weight = 'balanced', gamma = 'scale', probability = True, verbose = True))
        ]),
        {
          "Classifier__C": [0.1, 1, 10],
          "Classifier__kernel": ["linear","rbf"]
        }
    ),
    (
        'knn',
        Pipeline(steps=[
            ('scaler', MinMaxScaler()),
            ('Classifier', KNeighborsClassifier())
        ]),
        {
          "Classifier__n_neighbors": [1,10,100],
        }
    ),
    (
        'lr',
        Pipeline(steps=[
            ('scaler', MinMaxScaler()),
            ('Classifier', LogisticRegression(max_iter = 500, solver = 'lbfgs', random_state = seed, class_weight = 'balanced'))
        ]),
        {
          "Classifier__C": [0.001, 0.01, 0.1, 1, 10, 100]
        }
    )
]

In [7]:
for model_str, clf, params in models_and_params:
    print(f"Currently assessing {model_str}")
    
    # Inicia a run
    run = mlflow.start_run(run_name=model_str)

    try:
        # 1️⃣ Executar Grid Search
        grid_search = GridSearchCV(clf, params, cv=5, n_jobs=-1)
        grid_search.fit(X_train, y_train)

        # 2️⃣ Obter os melhores parâmetros e o melhor score
        best_params = grid_search.best_params_
        best_score = grid_search.best_score_

        # 3️⃣ Registrar os hiperparâmetros e a métrica no MLflow
        mlflow.log_params(best_params)
        mlflow.log_metric("best_cv_score", best_score)

        # 4️⃣ Salvar o modelo treinado no MLflow
        mlflow.sklearn.log_model(grid_search.best_estimator_, artifact_path=model_str, registered_model_name=model_str)

        # 5️⃣ Fazer predições no conjunto de teste
        y_preds = grid_search.best_estimator_.predict(X_test)

        # 6️⃣ Calcular métricas adicionais
        acc = accuracy_score(y_test, y_preds)
        mlflow.log_metric("accuracy", acc)

        print(f"✅ Model {model_str} completed with best score: {best_score:.4f}")

    except Exception as e:
        print(f"❌ Error in {model_str}: {e}")

    finally:
        # 🔹 Encerra a run explicitamente
        mlflow.end_run()

print("🎯 All models have been evaluated and logged in MLflow!")

Currently assessing ann


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
Successfully registered model 'ann'.
2025/03/25 23:09:47 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ann, version 1
Created version '1' of model 'ann'.


✅ Model ann completed with best score: 0.7967
🏃 View run ann at: http://127.0.0.1:5000/#/experiments/448265258201631582/runs/d303dae5e83d44e3a535aed85579aa0b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/448265258201631582
Currently assessing random_forest


Successfully registered model 'random_forest'.
2025/03/25 23:11:09 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: random_forest, version 1
Created version '1' of model 'random_forest'.


✅ Model random_forest completed with best score: 0.8088
🏃 View run random_forest at: http://127.0.0.1:5000/#/experiments/448265258201631582/runs/22671ac8b3554d58ab0deb71e311e27f
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/448265258201631582
Currently assessing decision_tree


Successfully registered model 'decision_tree'.
2025/03/25 23:11:24 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: decision_tree, version 1
Created version '1' of model 'decision_tree'.


✅ Model decision_tree completed with best score: 0.7446
🏃 View run decision_tree at: http://127.0.0.1:5000/#/experiments/448265258201631582/runs/246bf348b3c94665aca3983f24c8940b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/448265258201631582
Currently assessing svm
[LibSVM]

Successfully registered model 'svm'.
2025/03/25 23:12:49 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: svm, version 1
Created version '1' of model 'svm'.


✅ Model svm completed with best score: 0.7881
🏃 View run svm at: http://127.0.0.1:5000/#/experiments/448265258201631582/runs/af3a7c9210a34028ad9bf37238f70c90
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/448265258201631582
Currently assessing knn


Successfully registered model 'knn'.
2025/03/25 23:13:08 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: knn, version 1
Created version '1' of model 'knn'.


✅ Model knn completed with best score: 0.7992
🏃 View run knn at: http://127.0.0.1:5000/#/experiments/448265258201631582/runs/292a7ac88ae74ff187e718025ac30549
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/448265258201631582
Currently assessing lr


Successfully registered model 'lr'.
2025/03/25 23:13:21 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: lr, version 1
Created version '1' of model 'lr'.


✅ Model lr completed with best score: 0.6781
🏃 View run lr at: http://127.0.0.1:5000/#/experiments/448265258201631582/runs/d90f101cc6934aa38f333057cbce3f47
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/448265258201631582
🎯 All models have been evaluated and logged in MLflow!
