In [None]:
#Caso: Scoring para Crédito vehicular
#Autor: Armando Ita
#Fecha: 2025-10-15
#Una empresa de financiamiento automotriz quiere mejorar su proceso de evaluación de riesgos.
#Te han dado datos históricos de clientes y se desea construir un modelo de ML que prediga la probabilidad de que un cliente entre en default,
#a partir de sus características personales y crediticias.

#Tiempo: 10 minutos.

In [None]:
# Run these commands in your terminal or in a cell with '!' at the beginning
%pip install pandas
%pip install pycaret

In [1]:
import pandas as pd
from pycaret.classification import *

In [3]:
#1. Cargamos la data:
df = pd.read_csv('ds_credito_vehicular.csv')

In [4]:
df.head()

Unnamed: 0,edad,ingresos_mensuales,estado_civil,historia_credito,nro_creditos_previos,cuota_vs_ingreso,vehiculo_propio,default
0,59,7668,Divorciado,Buena,2,0.64,0,1
1,49,6279,Casado,Buena,2,0.31,1,1
2,35,2722,Soltero,Mala,4,0.41,1,0
3,63,4314,Divorciado,Regular,4,0.57,0,0
4,28,4157,Soltero,Buena,1,0.34,0,0


In [5]:
#Inicializar el experimento:
reg = setup(data = df, target = 'default', session_id = 123, normalize=True, categorical_features=['estado_civil', 'historia_credito'])

Unnamed: 0,Description,Value
0,Session id,123
1,Target,default
2,Target type,Binary
3,Original data shape,"(150, 8)"
4,Transformed data shape,"(150, 12)"
5,Transformed train set shape,"(105, 12)"
6,Transformed test set shape,"(45, 12)"
7,Numeric features,5
8,Categorical features,2
9,Preprocess,True


In [6]:
#3. Comparar modelos automáticamente:
best_model = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dummy,Dummy Classifier,0.8291,0.5,0.0,0.0,0.0,0.0,0.0,0.005
ridge,Ridge Classifier,0.8191,0.5049,0.0,0.0,0.0,-0.0154,-0.0167,0.006
rf,Random Forest Classifier,0.81,0.4587,0.0,0.0,0.0,-0.0292,-0.0316,0.017
lr,Logistic Regression,0.7991,0.5035,0.0,0.0,0.0,-0.0404,-0.0417,0.282
lda,Linear Discriminant Analysis,0.79,0.5049,0.05,0.05,0.05,-0.0153,-0.0177,0.005
lightgbm,Light Gradient Boosting Machine,0.7891,0.5993,0.05,0.1,0.0667,0.0013,0.0028,0.058
knn,K Neighbors Classifier,0.7809,0.3132,0.0,0.0,0.0,-0.0637,-0.0659,0.115
gbc,Gradient Boosting Classifier,0.7527,0.55,0.05,0.05,0.05,-0.0639,-0.0687,0.009
ada,Ada Boost Classifier,0.7473,0.5396,0.1,0.05,0.0667,-0.0384,-0.0365,0.008
et,Extra Trees Classifier,0.7418,0.3851,0.0,0.0,0.0,-0.1148,-0.1219,0.015


In [8]:
#4. Crear el modelo:
dummy = create_model('dummy')

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8182,0.5,0.0,0.0,0.0,0.0,0.0
1,0.8182,0.5,0.0,0.0,0.0,0.0,0.0
2,0.8182,0.5,0.0,0.0,0.0,0.0,0.0
3,0.8182,0.5,0.0,0.0,0.0,0.0,0.0
4,0.8182,0.5,0.0,0.0,0.0,0.0,0.0
5,0.9,0.5,0.0,0.0,0.0,0.0,0.0
6,0.9,0.5,0.0,0.0,0.0,0.0,0.0
7,0.8,0.5,0.0,0.0,0.0,0.0,0.0
8,0.8,0.5,0.0,0.0,0.0,0.0,0.0
9,0.8,0.5,0.0,0.0,0.0,0.0,0.0


In [11]:
#5. Ajustar el modelo:
tuned_dummy = tune_model(dummy)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8182,0.5,0.0,0.0,0.0,0.0,0.0
1,0.8182,0.5,0.0,0.0,0.0,0.0,0.0
2,0.8182,0.5,0.0,0.0,0.0,0.0,0.0
3,0.8182,0.5,0.0,0.0,0.0,0.0,0.0
4,0.8182,0.5,0.0,0.0,0.0,0.0,0.0
5,0.9,0.5,0.0,0.0,0.0,0.0,0.0
6,0.9,0.5,0.0,0.0,0.0,0.0,0.0
7,0.8,0.5,0.0,0.0,0.0,0.0,0.0
8,0.8,0.5,0.0,0.0,0.0,0.0,0.0
9,0.8,0.5,0.0,0.0,0.0,0.0,0.0


Fitting 10 folds for each of 4 candidates, totalling 40 fits
Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


In [12]:
#6. Evaluar visualmente el modelo:
evaluate_model(tuned_dummy)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

In [13]:
#7. Predicción:
preds = predict_model(tuned_dummy)
print(preds.head())

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Dummy Classifier,0.8444,0.5,0.0,0.0,0.0,0.0,0.0


     edad  ingresos_mensuales estado_civil historia_credito  \
57     60                4457       Casado            Buena   
98     55                2147       Casado            Buena   
54     34                5659      Soltero          Regular   
92     64                2930       Casado            Buena   
137    33                4643       Casado            Buena   

     nro_creditos_previos  cuota_vs_ingreso  vehiculo_propio  default  \
57                      3              0.32                0        0   
98                      2              0.25                1        0   
54                      3              0.32                0        0   
92                      2              0.31                1        0   
137                     0              0.69                0        1   

     prediction_label  prediction_score  
57                  0            0.8286  
98                  0            0.8286  
54                  0            0.8286  
92            

In [None]:
#1. En promedio, el modelo se equivoca en un 5.76% del precio real.
#2. El modelo explica el 97.43% de la varianza de los precios.
#3. Error logaritmico medio: 0.0807 es bajo; buen performance en escalas grandes.

#Conclusión: El modelo tiene muy buen ajuste global. Es explicativo porque tiene un R2 ajustado alto y el error medio es bajo. Es preciso porque tiene un MAPE pequeño.

In [16]:
#8. Guardar el modelo:
save_model(tuned_dummy, 'modelo_credito_vehicular_final')

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=Memory(location=None),
          steps=[('numerical_imputer',
                  TransformerWrapper(exclude=None,
                                     include=['edad', 'ingresos_mensuales',
                                              'nro_creditos_previos',
                                              'cuota_vs_ingreso',
                                              'vehiculo_propio'],
                                     transformer=SimpleImputer(add_indicator=False,
                                                               copy=True,
                                                               fill_value=None,
                                                               keep_empty_features=False,
                                                               missing_values=nan,
                                                               strategy='mean'))),
                 ('categorical_impu...
                                                           