# Desenvolvimento do modelo de predição

In [1]:
import pandas as pd
from dagshub.data_engine import datasources
import mlflow
import dagshub
from sklearn.model_selection import train_test_split
import mlflow.sklearn
import mlflow.catboost
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor
import lightgbm as lgb
from sklearn.svm import SVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
import mlflow.models.signature
from mlflow.models import infer_signature
from catboost import CatBoostRegressor

# Obtendo o dataset

In [2]:
ds = datasources.get('fabioebner/quantum-finance-mlops', 'processed')


In [3]:
#Baixando o dataset do processed
ds.all().dataframe 
res = ds.head()
for dp in res:
    dataset_url = dp.download_url
        
    

df =pd.read_csv(dataset_url)


Output()

Output()

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 73156 entries, 0 to 73155
Data columns (total 14 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Age                       73156 non-null  float64
 1   Occupation                73156 non-null  float64
 2   Annual_Income             73156 non-null  float64
 3   Monthly_Inhand_Salary     73156 non-null  float64
 4   Num_Bank_Accounts         73156 non-null  float64
 5   Num_Credit_Card           73156 non-null  float64
 6   Num_of_Loan               73156 non-null  float64
 7   Num_of_Delayed_Payment    73156 non-null  float64
 8   Num_Credit_Inquiries      73156 non-null  float64
 9   Credit_Utilization_Ratio  73156 non-null  float64
 10  Total_EMI_per_month       73156 non-null  float64
 11  Amount_invested_monthly   73156 non-null  float64
 12  Monthly_Balance           73156 non-null  float64
 13  Credit_Score              73156 non-null  int64  
dtypes: flo

In [5]:
df.head()

Unnamed: 0,Age,Occupation,Annual_Income,Monthly_Inhand_Salary,Num_Bank_Accounts,Num_Credit_Card,Num_of_Loan,Num_of_Delayed_Payment,Num_Credit_Inquiries,Credit_Utilization_Ratio,Total_EMI_per_month,Amount_invested_monthly,Monthly_Balance,Credit_Score
0,0.001036,0.6,0.000501,0.102087,0.001669,0.002668,0.002676,0.001592,0.00154,0.203984,0.000602,0.008042,9.3708e-25,2
1,0.001036,0.6,0.000501,0.102087,0.001669,0.002668,0.002676,0.001819,0.00154,0.057005,0.000602,0.017834,7.3332e-25,2
2,0.001036,0.6,0.000501,0.102087,0.001669,0.002668,0.002676,0.001365,0.00154,0.104739,0.000602,0.002479,1.0739699999999999e-24,1
3,0.001612,0.0,0.001151,0.183501,0.001112,0.002668,0.000669,0.00091,0.00077,0.12294,0.000229,0.010429,1.4116799999999999e-24,1
4,0.001612,0.533333,0.001151,0.183501,0.001112,0.002668,0.000669,0.000227,0.00077,0.606799,0.000229,0.004039,1.45338e-24,2


In [6]:
dagshub.init(repo_owner='fabioebner',
             repo_name='quantum-finance-mlops',mlflow=True)

mlflow.autolog()

2025/08/01 08:24:09 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2025/08/01 08:24:10 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2025/08/01 08:24:10 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.


In [7]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=['Credit_Score']), df['Credit_Score'], test_size=0.3, random_state=42)

In [8]:
#Funcao para rastrear o modelo

def evaluate_and_log_model(kind, model_name, model, X_test, y_test):
   predictions = model.predict(X_test)
   mse = mean_squared_error(y_test, predictions)
   mae = mean_absolute_error(y_test, predictions)
   r2 = r2_score(y_test, predictions)
   mape = mean_absolute_percentage_error(y_test, predictions)

   mlflow.log_metric("MSE", mse)
   mlflow.log_metric("MAE", mae)
   mlflow.log_metric("R2", r2)
   mlflow.log_metric("MAPE", mape)

   # Inferir a assinatura automaticamente
   signature = infer_signature(X_test, predictions)

   if kind == "catboost":
      mlflow.sklearn.log_model(model, "model", signature=signature, input_example=X_test[:5])
   elif kind == "xgboost":
      mlflow.xgboost.log_model(model, "model", signature=signature, input_example=X_test[:5])
   elif kind == "lightgbm":
      mlflow.lightgbm.log_model(model, "model", signature=signature, input_example=X_test[:5])
   else:
      mlflow.sklearn.log_model(model, "model", signature=signature, input_example=X_test[:5])


### Experimento com Ridge Regression

In [9]:
with mlflow.start_run(run_name="Ridge Regression"):
    param_grid = {
        'alpha': [0.1,1.0, 10.0, 100.0]
    }

    ridge = Ridge()
    grid_search = GridSearchCV(ridge, param_grid, cv=5, scoring=make_scorer(mean_absolute_percentage_error, greater_is_better=False))
    grid_search.fit(X_train, y_train)
    best_model = grid_search.best_estimator_

    mlflow.log_param("Best_alpha", best_model.alpha)
    evaluate_and_log_model("sklearn", "Ridge Regression", best_model, X_test, y_test)

2025/08/01 08:24:30 INFO mlflow.sklearn.utils: Logging the 5 best runs, no runs will be omitted.


🏃 View run flawless-hawk-49 at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/4397b9d1843446b8ad92171bf121b301
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0
🏃 View run suave-cow-689 at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/309700858dc64f69b45bc512f7acec84
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0
🏃 View run rebellious-auk-736 at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/c55b70cb61ac41ebbed28041b113642c
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0
🏃 View run caring-stork-618 at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/a0ad85e8d5bd4de39519e2ebf43c07ea
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0
🏃 View run Ridge Regressi

🏃 View run hilarious-bear-587 at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/b50888807fc7428c90afc4ecc91fbe71
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0
🏃 View run shivering-sheep-382 at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/edc00efaa3cc4ac9a67c323d6a586925
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0
🏃 View run fun-donkey-678 at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/24d396f96ac049dfbf48d6faba062e22
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0
🏃 View run bold-snake-900 at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/939aaa5b3254463e92a7a137af266423
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0
🏃 View run honorable-ba



🏃 View run amusing-midge-521 at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/899ac9d27b5644e59bef5fcfd21d387e
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0
🏃 View run agreeable-carp-801 at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/affb86040d0246fb9d28625a3275808f
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0
🏃 View run salty-doe-939 at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/1457a99991da47bc82ea405bb06f247a
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0
🏃 View run nosy-deer-585 at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/eca7e0e40cd642d3b8995183bead0113
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0


### Decision Tree

In [10]:
with mlflow.start_run(run_name="Decision Tree Regression"):
    param_grid = {
        'max_depth': [3,5,10,None],
        'min_samples_split': [2, 5, 10]
    }

    tree = DecisionTreeRegressor(random_state=42)
    grid_search = GridSearchCV(tree, param_grid, scoring=make_scorer(mean_absolute_percentage_error, greater_is_better=False), cv=5)
    grid_search.fit(X_train, y_train)

    best_model = grid_search.best_estimator_

    mlflow.log_param("Best_max_depth", best_model.max_depth)
    mlflow.log_param("Best_min_samples_split", best_model.min_samples_split)


    evaluate_and_log_model("sklearn", "Decision Tree Regression", best_model, X_test, y_test)

2025/08/01 08:25:32 INFO mlflow.sklearn.utils: Logging the 5 best runs, 7 runs will be omitted.


🏃 View run spiffy-mink-571 at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/4cc9cdef771f46498c402701c0de123f
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0
🏃 View run Decision Tree Regression at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/8df2245856664dafbc7da59bfe52746e
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0


🏃 View run languid-slug-706 at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/cb9b7c6df43a4e7db0c0963bbd4d9548
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0




🏃 View run valuable-gnat-667 at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/3985dc70e3ed4b80a6c8d8bc04e7de8a
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0


### MLP Regression

In [12]:
from sklearn.neural_network import MLPRegressor

with mlflow.start_run(run_name="MLP Regression"):
    param_grid = {
        'hidden_layer_sizes': [(50,), (100,), (50, 50)],
        'activation': ['relu', 'tanh'],
        'alpha': [0.0001, 0.001, 0.01, 0.1]
    }

    tree = MLPRegressor(max_iter=500, random_state=42)
    grid_search = GridSearchCV(tree, param_grid, scoring=make_scorer(mean_absolute_percentage_error, greater_is_better=False), cv=5)
    grid_search.fit(X_train, y_train)

    best_model = grid_search.best_estimator_

    mlflow.log_param("Best_hidden_layer_sizes", best_model.hidden_layer_sizes)
    mlflow.log_param("Best_activation", best_model.activation)
    mlflow.log_param("Best_alpha", best_model.alpha)


    evaluate_and_log_model("sklearn", "MLP Regression", best_model, X_test, y_test)

2025/08/01 08:44:06 INFO mlflow.sklearn.utils: Logging the 5 best runs, 19 runs will be omitted.


🏃 View run MLP Regression at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/1f244e92069943acaca8a036c74ff5b7
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0


### XGBoost Regressor

In [11]:
with mlflow.start_run(run_name="XGBoost Regressor"):
    param_grid = {
        'n_estimators': [100, 200,300],
        'max_depth': [3, 5, 7,9],
        'learning_rate': [0.01, 0.1, 0.2, 0.3]
    }

    tree = XGBRegressor(random_state=42, verbosity=0)

    grid_search = GridSearchCV(tree, param_grid, scoring=make_scorer(mean_absolute_percentage_error, greater_is_better=False), cv=5)
    grid_search.fit(X_train, y_train)

    best_model = grid_search.best_estimator_

    mlflow.log_param("Best_n_estimators", best_model.n_estimators)
    mlflow.log_param("Best_max_depth", best_model.max_depth)
    mlflow.log_param("Best_learning_rate", best_model.learning_rate)


    evaluate_and_log_model("xgboost", "XGBoost Regressor", best_model, X_test, y_test)

2025/08/01 08:30:01 INFO mlflow.sklearn.utils: Logging the 5 best runs, 43 runs will be omitted.
  self.get_booster().save_model(fname)


🏃 View run XGBoost Regressor at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0/runs/c58a1f8a1afe4cb09eadb358edd11f51
🧪 View experiment at: https://dagshub.com/fabioebner/quantum-finance-mlops.mlflow/#/experiments/0


### Registry do Modelo

In [13]:
run_id = "c58a1f8a1afe4cb09eadb358edd11f51"

mlflow.register_model(
    model_uri=f"runs:/{run_id}/model",
    name="CreditScoreModel"
)

Successfully registered model 'CreditScoreModel'.
2025/08/01 08:45:08 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: CreditScoreModel, version 1
Created version '1' of model 'CreditScoreModel'.


<ModelVersion: aliases=[], creation_timestamp=1754048708145, current_stage='None', description='', last_updated_timestamp=1754048708145, name='CreditScoreModel', run_id='c58a1f8a1afe4cb09eadb358edd11f51', run_link='', source='mlflow-artifacts:/f9b061f0135d46b3a8b4b22bcf840fe9/c58a1f8a1afe4cb09eadb358edd11f51/artifacts/model', status='READY', status_message=None, tags={}, user_id='', version='1'>