# Desenvolvimento do modelo de classificação

In [None]:
import pandas as pd
from dagshub.data_engine import datasources
import dagshub
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import mlflow
from mlflow.models.signature import infer_signature
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, f1_score
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

# Obtendo dados do dataset

In [20]:
ds = datasources.get('jef-santos/9DTSR_MLOPS_TrabalhoFinal', 'processed')


In [21]:
ds.all().dataframe

Output()

Unnamed: 0,path,datapoint_id,dagshub_download_url,media type,size
0,credit_score_test.csv,103549194,https://dagshub.com/api/v1/repos/jef-santos/9D...,text/plain,7748488
1,credit_score_train.csv,103549195,https://dagshub.com/api/v1/repos/jef-santos/9D...,text/plain,15690339
2,.gitkeep,103549193,https://dagshub.com/api/v1/repos/jef-santos/9D...,,0


In [22]:
res = ds.head()
for dp in res:
    print(dp.path_in_repo)
    print(dp.download_url)
    dataset_url = dp.download_url

Output()

data/processed/credit_score_test.csv
https://dagshub.com/api/v1/repos/jef-santos/9DTSR_MLOPS_TrabalhoFinal/raw/main/data/processed/credit_score_test.csv
data/processed/credit_score_train.csv
https://dagshub.com/api/v1/repos/jef-santos/9DTSR_MLOPS_TrabalhoFinal/raw/main/data/processed/credit_score_train.csv
data/processed/.gitkeep
https://dagshub.com/api/v1/repos/jef-santos/9DTSR_MLOPS_TrabalhoFinal/raw/main/data/processed/.gitkeep


In [23]:
df = pd.read_csv('https://dagshub.com/api/v1/repos/jef-santos/9DTSR_MLOPS_TrabalhoFinal/raw/main/data/processed/credit_score_train.csv')
df.head()

Unnamed: 0,Age,Annual_Income,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Num_of_Loan,Num_of_Delayed_Payment,Outstanding_Debt,Credit_Utilization_Ratio,Amount_invested_monthly,...,Occupation_musician,Occupation_scientist,Occupation_sem_informacao,Occupation_teacher,Occupation_writer,Credit_Mix_bad,Credit_Mix_good,Credit_Mix_sem_informacao,Credit_Mix_standard,target_numeric
0,23,-0.105759,3,4,3,4,7,-0.52076,-1.067845,80.415295,...,0,1,0,0,0,0,0,1,0,0
1,23,-0.105759,3,4,3,4,0,-0.52076,-0.067121,118.280222,...,0,1,0,0,0,0,1,0,0,0
2,23,-0.105759,3,4,3,4,4,-0.52076,-0.177912,199.458074,...,0,1,0,0,0,0,1,0,0,0
3,23,-0.105759,3,4,3,4,0,-0.52076,-1.463512,41.420153,...,0,1,0,0,0,0,1,0,0,0
4,23,-0.105759,3,4,3,4,4,-0.52076,-0.981955,62.430172,...,0,1,0,0,0,0,1,0,0,0


# Desenvolvimento dos modelos

In [24]:
dagshub.init(repo_owner='jef-santos', repo_name='9DTSR_MLOPS_TrabalhoFinal', mlflow=True)


In [25]:
mlflow.autolog()

2025/07/27 15:42:23 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2025/07/27 15:42:23 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2025/07/27 15:42:23 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.


In [26]:
features = list(df.columns)
features.remove("target_numeric")
features

['Age',
 'Annual_Income',
 'Num_Bank_Accounts',
 'Num_Credit_Card',
 'Interest_Rate',
 'Num_of_Loan',
 'Num_of_Delayed_Payment',
 'Outstanding_Debt',
 'Credit_Utilization_Ratio',
 'Amount_invested_monthly',
 'Monthly_Balance',
 'TypeLoan_autoloan',
 'TypeLoan_credit-builderloan',
 'TypeLoan_debtconsolidationloan',
 'TypeLoan_homeequityloan',
 'TypeLoan_mortgageloan',
 'TypeLoan_paydayloan',
 'TypeLoan_personalloan',
 'TypeLoan_studentloan',
 'Occupation_accountant',
 'Occupation_architect',
 'Occupation_developer',
 'Occupation_doctor',
 'Occupation_engineer',
 'Occupation_entrepreneur',
 'Occupation_journalist',
 'Occupation_lawyer',
 'Occupation_manager',
 'Occupation_mechanic',
 'Occupation_media_manager',
 'Occupation_musician',
 'Occupation_scientist',
 'Occupation_sem_informacao',
 'Occupation_teacher',
 'Occupation_writer',
 'Credit_Mix_bad',
 'Credit_Mix_good',
 'Credit_Mix_sem_informacao',
 'Credit_Mix_standard']

In [27]:
len(features)

39

In [28]:
X = df[features]
X

Unnamed: 0,Age,Annual_Income,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Num_of_Loan,Num_of_Delayed_Payment,Outstanding_Debt,Credit_Utilization_Ratio,Amount_invested_monthly,...,Occupation_media_manager,Occupation_musician,Occupation_scientist,Occupation_sem_informacao,Occupation_teacher,Occupation_writer,Credit_Mix_bad,Credit_Mix_good,Credit_Mix_sem_informacao,Credit_Mix_standard
0,23,-0.105759,3,4,3,4,7,-0.520760,-1.067845,80.415295,...,0,0,1,0,0,0,0,0,1,0
1,23,-0.105759,3,4,3,4,0,-0.520760,-0.067121,118.280222,...,0,0,1,0,0,0,0,1,0,0
2,23,-0.105759,3,4,3,4,4,-0.520760,-0.177912,199.458074,...,0,0,1,0,0,0,0,1,0,0
3,23,-0.105759,3,4,3,4,0,-0.520760,-1.463512,41.420153,...,0,0,1,0,0,0,0,1,0,0
4,23,-0.105759,3,4,3,4,4,-0.520760,-0.981955,62.430172,...,0,0,1,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92367,25,-0.091167,4,6,7,2,7,-0.785916,0.463999,60.971333,...,0,0,0,0,0,0,0,0,1,0
92368,25,-0.091167,4,6,7,2,7,-0.785916,1.617052,54.185950,...,0,0,0,0,0,0,0,0,1,0
92369,25,-0.091167,4,6,5729,2,6,-0.785916,1.751833,24.028477,...,0,0,0,0,0,0,0,1,0,0
92370,25,-0.091167,4,6,7,2,0,-0.785916,0.263679,251.672582,...,0,0,0,0,0,0,0,1,0,0


In [29]:
y = df["target_numeric"]
y

0        0
1        0
2        0
3        0
4        0
        ..
92367    2
92368    2
92369    2
92370    1
92371    2
Name: target_numeric, Length: 92372, dtype: int64

In [30]:
X_train, X_test, y_train, y_test = train_test_split(X.values, y.values, test_size=0.3, random_state=42)

## Criando função de avaliação

In [None]:
def evaluate_and_log_classification_model(kind, model_name, model, X_test, y_test):
    predictions = model.predict(X_test)

    # Algumas métricas comuns de classificação
    accuracy = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions, average='weighted')  # 'weighted' para múltiplas classes
    recall = recall_score(y_test, predictions, average='weighted')
    f1 = f1_score(y_test, predictions, average='weighted')

    # Log das métricas no MLflow
    mlflow.log_metric("Accuracy", accuracy)
    mlflow.log_metric("Precision", precision)
    mlflow.log_metric("Recall", recall)
    mlflow.log_metric("F1_Score", f1)

    # Inferência da assinatura dos dados
    signature = infer_signature(X_test, predictions)

    # Log do modelo
    if kind == "catboost":
        mlflow.sklearn.log_model(model, "model", signature=signature, input_example=X_test[:5])
    elif kind == "xgboost":
        mlflow.xgboost.log_model(model, "model", signature=signature, input_example=X_test[:5])
    elif kind == "lightgbm":
        mlflow.lightgbm.log_model(model, "model", signature=signature, input_example=X_test[:5])
    else:
        mlflow.sklearn.log_model(model, "model", signature=signature, input_example=X_test[:5])

    # Impressão dos resultados
    print(f"Resultados para {model_name}:")
    print(f" Accuracy: {accuracy:.4f}")
    print(f" Precision: {precision:.4f}")
    print(f" Recall: {recall:.4f}")
    print(f" F1 Score: {f1:.4f}")


# Rodando Experimentos

In [None]:
with mlflow.start_run(run_name="RandomForest_Classifier"):
    param_grid = {
        'n_estimators': [100, 200],
        'max_depth': [5, 10, None],
    }
    rf = RandomForestClassifier(random_state=42)
    grid_search = GridSearchCV(rf, param_grid, scoring=make_scorer(f1_score, average='weighted'), cv=5)
    grid_search.fit(X_train, y_train)
    best_model = grid_search.best_estimator_

    mlflow.log_param("best_n_estimators", best_model.n_estimators)
    mlflow.log_param("best_max_depth", best_model.max_depth)
    
    evaluate_and_log_classification_model("sklearn", "Random Forest", best_model, X_test, y_test)


2025/07/27 15:47:52 INFO mlflow.sklearn.utils: Logging the 5 best runs, one run will be omitted.


🏃 View run nosy-bird-436 at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/5907e499ae5748b1b76f2a4b7086ac62
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0
🏃 View run sincere-roo-406 at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/90ba5a1789e6449e82eced4dbbcc0ec8
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0
🏃 View run kindly-squid-527 at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/c1801859a0d04d6fb4373490645f4ff6
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0
🏃 View run stately-lynx-880 at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/318cacf502e64043bd93283f93132305
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiment



🏃 View run rebellious-swan-916 at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/e79985a2fd5b4c37bdb0a6e724d60075
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0
🏃 View run handsome-conch-409 at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/ea68242cdc374b74a86b3281ff83b6e3
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0
🏃 View run loud-slug-933 at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/7c73ef1f19a74f3ea152a3ce748e9d33
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0




🏃 View run resilient-toad-630 at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/7fea5071b22f4efdb0362842c1dcfdc5
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0
🏃 View run angry-dove-485 at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/bb7e432760cc48adbb116cacfa4c702b
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0
🏃 View run selective-crane-960 at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/9bda5b9ad7f541feb6029aeed4e946c5
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0
🏃 View run smiling-skink-359 at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/e2b0a6280cbe4e3285f564f9ab87deda
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/ex



🏃 View run stylish-ant-298 at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/962bc30bb5ca4bdcaca3069f7f468396
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0
🏃 View run treasured-wren-899 at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/b8639e1dcd6343db8b8aaa763319ef5f
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0
🏃 View run defiant-foal-760 at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/39656b6bdb20418fa87f50dabac901ea
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0
🏃 View run melodic-ray-716 at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/23e4a66f03bb49309d1e873e10652469
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experi

In [None]:
with mlflow.start_run(run_name="XGBoost_Classifier"):
    param_grid = {
        'n_estimators': [100, 200],
        'max_depth': [3, 6],
        'learning_rate': [0.01, 0.1]
    }
    xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
    grid_search = GridSearchCV(xgb, param_grid, scoring=make_scorer(f1_score, average='weighted'), cv=5)
    grid_search.fit(X_train, y_train)
    best_model = grid_search.best_estimator_

    mlflow.log_param("best_n_estimators", best_model.n_estimators)
    mlflow.log_param("best_max_depth", best_model.max_depth)
    mlflow.log_param("best_learning_rate", best_model.learning_rate)

    evaluate_and_log_classification_model("xgboost", "XGBoost", best_model, X_test, y_test)


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


Resultados para XGBoost:
 Accuracy: 0.7212
 Precision: 0.7216
 Recall: 0.7212
 F1 Score: 0.7208
🏃 View run XGBoost_Classifier at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/afcc74ea99c04495aec17da87878b642
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0


In [None]:
with mlflow.start_run(run_name="LightGBM_Classifier"):
    param_grid = {
        'n_estimators': [100, 200],
        'num_leaves': [31, 50],
        'learning_rate': [0.01, 0.1]
    }
    lgbm = LGBMClassifier(random_state=42)
    grid_search = GridSearchCV(lgbm, param_grid, scoring=make_scorer(f1_score, average='weighted'), cv=5)
    grid_search.fit(X_train, y_train)
    best_model = grid_search.best_estimator_

    mlflow.log_param("best_n_estimators", best_model.n_estimators)
    mlflow.log_param("best_num_leaves", best_model.num_leaves)
    mlflow.log_param("best_learning_rate", best_model.learning_rate)

    evaluate_and_log_classification_model("lightgbm", "LightGBM", best_model, X_test, y_test)




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2476
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631268
[LightGBM] [Info] Start training from score -1.235751




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2479
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631268
[LightGBM] [Info] Start training from score -1.235751




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2477
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2470
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2475
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2476
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631268
[LightGBM] [Info] Start training from score -1.235751




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2479
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631268
[LightGBM] [Info] Start training from score -1.235751




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2477
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2470
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2475
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2476
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631268
[LightGBM] [Info] Start training from score -1.235751




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2479
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631268
[LightGBM] [Info] Start training from score -1.235751




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2477
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2470
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2475
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2476
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631268
[LightGBM] [Info] Start training from score -1.235751




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2479
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631268
[LightGBM] [Info] Start training from score -1.235751




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2477
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2470
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2475
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2476
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631268
[LightGBM] [Info] Start training from score -1.235751




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2479
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631268
[LightGBM] [Info] Start training from score -1.235751




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2477
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2470
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2475
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2476
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631268
[LightGBM] [Info] Start training from score -1.235751




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2479
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631268
[LightGBM] [Info] Start training from score -1.235751




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2477
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2470
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2475
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2476
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631268
[LightGBM] [Info] Start training from score -1.235751




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2479
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631268
[LightGBM] [Info] Start training from score -1.235751




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2477
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2470
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2475
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2476
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631268
[LightGBM] [Info] Start training from score -1.235751




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2479
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631268
[LightGBM] [Info] Start training from score -1.235751




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2477
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2470
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2475
[LightGBM] [Info] Number of data points in the train set: 51728, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631304
[LightGBM] [Info] Start training from score -1.235685




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2635
[LightGBM] [Info] Number of data points in the train set: 64660, number of used features: 39
[LightGBM] [Info] Start training from score -1.728972
[LightGBM] [Info] Start training from score -0.631290
[LightGBM] [Info] Start training from score -1.235712


2025/07/27 16:02:20 INFO mlflow.sklearn.utils: Logging the 5 best runs, 3 runs will be omitted.


Resultados para LightGBM:
 Accuracy: 0.7473
 Precision: 0.7473
 Recall: 0.7473
 F1 Score: 0.7471
🏃 View run LightGBM_Classifier at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/abfc4e47c1204f62b024ee63c5c428df
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0


In [None]:
fwith mlflow.start_run(run_name="CatBoost_Classifier"):
    param_grid = {
        'iterations': [100, 200],
        'depth': [4, 6],
        'learning_rate': [0.01, 0.1]
    }
    catboost = CatBoostClassifier(verbose=0, random_state=42)
    grid_search = GridSearchCV(catboost, param_grid, scoring=make_scorer(f1_score, average='weighted'), cv=5)
    grid_search.fit(X_train, y_train)
    best_model = grid_search.best_estimator_

    mlflow.log_param("best_iterations", best_model.get_param("iterations"))
    mlflow.log_param("best_depth", best_model.get_param("depth"))
    mlflow.log_param("best_learning_rate", best_model.get_param("learning_rate"))

    evaluate_and_log_classification_model("catboost", "CatBoost", best_model, X_test, y_test)


2025/07/27 16:04:47 INFO mlflow.sklearn.utils: Logging the 5 best runs, 3 runs will be omitted.


Resultados para CatBoost:
 Accuracy: 0.6969
 Precision: 0.6997
 Recall: 0.6969
 F1 Score: 0.6971
🏃 View run CatBoost_Classifier at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0/runs/51d4711fff134cef94fd0f763678b57b
🧪 View experiment at: https://dagshub.com/jef-santos/9DTSR_MLOPS_TrabalhoFinal.mlflow/#/experiments/0


# Registrando modelo com melhor performance

In [36]:
run_id = "81f46a494f50418faf491c2382011b6f"
mlflow.register_model(model_uri=f"runs:/{run_id}/model", name="score-credit-model")

Successfully registered model 'score-credit-model'.
2025/07/27 18:53:08 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: score-credit-model, version 1
Created version '1' of model 'score-credit-model'.


<ModelVersion: aliases=[], creation_timestamp=1753653188430, current_stage='None', description='', last_updated_timestamp=1753653188430, name='score-credit-model', run_id='81f46a494f50418faf491c2382011b6f', run_link='', source='mlflow-artifacts:/46e138aa57b044e386a21f8828be8e20/81f46a494f50418faf491c2382011b6f/artifacts/model', status='READY', status_message=None, tags={}, user_id='', version='1'>