# Regresión Logística Simple

In [46]:
from src.trainning          import create_variables
from src.data_loader        import create_df
from src.evaluation         import show_confusion_matrix, show_metricas, get_coeficientes, show_auc_roc, show_especificidad, show_coeficientes
from src.trainning          import final_pipeline
from src.data_loader        import serialize_models
from src.parser             import YamlParser
from pathlib                import Path


YAML_FILE = Path.cwd() / "config.yaml"
yaml_parser = YamlParser()
config = yaml_parser.load_yaml(YAML_FILE)

df_model_rl = create_df('raw_data')
model_pipeline = final_pipeline(model_type=config['params']['model_type']['logistic_regression'])

In [47]:
X_train, X_test, y_train, y_test = create_variables(df_model_rl)

model_pipeline.fit(X_train, y_train)

serialize_models(model_pipeline, 'rl_01_model')

pred = model_pipeline.predict(X_test)

In [48]:
metricas, accuracy_final = show_metricas(y_test, pred)
especificidad = show_especificidad(y_test, pred)

print(f"\nAccuracy del Modelo Final Optimizado: {accuracy_final:.4f}")
print(f"\nEspecificidad del Modelo Final Optimizado: {especificidad:.4f}")
print("\nReporte de Clasificación del Modelo Final:")
print(metricas)


Accuracy del Modelo Final Optimizado: 0.7472

Especificidad del Modelo Final Optimizado: 0.7613

Reporte de Clasificación del Modelo Final:
              precision    recall  f1-score   support

           0       0.92      0.76      0.83       222
           1       0.35      0.67      0.46        43

    accuracy                           0.75       265
   macro avg       0.64      0.72      0.65       265
weighted avg       0.83      0.75      0.77       265



In [49]:
gr = show_confusion_matrix(y_test, pred, 'Regresion Logistica Simple')
gr

In [50]:
fig, auc_roc = show_auc_roc(model_pipeline, X_test, y_test)
fig

In [51]:
coef = model_pipeline.named_steps['model'].coef_[0]
columns_names = model_pipeline.named_steps['preprocessing'].get_feature_names_out()

coeficientes = get_coeficientes(coef, columns_names)
gr = show_coeficientes(coeficientes)
gr

# Regresión Logística con SMOTEENN

In [52]:
model_smote = final_pipeline(model_type=config['params']['model_type']['logistic_regression'], use_smote=True)

model_smote.fit(X_train, y_train)
print(serialize_models(model_smote, 'rl_02_model_smote'))

pred = model_smote.predict(X_test)

Modelo rl_02_model_smote.pkl guardado con éxito


In [53]:
metricas, accuracy_final = show_metricas(y_test, pred)
especificidad = show_especificidad(y_test, pred)

print(f"\nAccuracy del Modelo Final Optimizado: {accuracy_final:.4f}")
print(f"\nEspecificidad del Modelo Final Optimizado: {especificidad:.4f}")
print("\nReporte de Clasificación del Modelo Final:")
print(metricas)


Accuracy del Modelo Final Optimizado: 0.6302

Especificidad del Modelo Final Optimizado: 0.6126

Reporte de Clasificación del Modelo Final:
              precision    recall  f1-score   support

           0       0.92      0.61      0.74       222
           1       0.26      0.72      0.39        43

    accuracy                           0.63       265
   macro avg       0.59      0.67      0.56       265
weighted avg       0.81      0.63      0.68       265



In [54]:
gr = show_confusion_matrix(y_test, pred, 'Regresion Logistica con SMOTEENN')
gr

In [55]:
coef = model_smote.named_steps['model'].coef_[0]
columns_names = model_smote.named_steps['preprocessing'].get_feature_names_out()

coeficientes = get_coeficientes(coef, columns_names)
gr = show_coeficientes(coeficientes)
gr

In [56]:
fig, auc_roc = show_auc_roc(model_smote, X_test, y_test)
fig

# Regresión Logística con SMOTEENN - 15 Variables con mayor coeficiente

In [57]:
model_smote = final_pipeline(
    model_type      =   config['params']['model_type']['logistic_regression'], 
    use_smote       =   True, 
    use_importances =   True
)

model_smote.fit(X_train, y_train)
serialize_models(model_smote, 'rl_03_model_smote_15v')


pred = model_smote.predict(X_test)




Inconsistent values: penalty=l1 with l1_ratio=0.0. penalty is deprecated. Please use l1_ratio only.



In [58]:
metricas, accuracy_final = show_metricas(y_test, pred)
especificidad = show_especificidad(y_test, pred)

print(f"\nAccuracy del Modelo Final Optimizado: {accuracy_final:.4f}")
print(f"\nEspecificidad del Modelo Final Optimizado: {especificidad:.4f}")
print("\nReporte de Clasificación del Modelo Final:")
print(metricas)


Accuracy del Modelo Final Optimizado: 0.6075

Especificidad del Modelo Final Optimizado: 0.5766

Reporte de Clasificación del Modelo Final:
              precision    recall  f1-score   support

           0       0.93      0.58      0.71       222
           1       0.26      0.77      0.39        43

    accuracy                           0.61       265
   macro avg       0.59      0.67      0.55       265
weighted avg       0.82      0.61      0.66       265



In [59]:
gr = show_confusion_matrix(y_test, pred, 'Regresion Logistica - SMOTEENN - 15 Variables')
gr

In [60]:
fig, auc_roc = show_auc_roc(model_smote, X_test, y_test)
fig

In [61]:
coef = model_smote.named_steps['model'].coef_[0]
columns_names = model_smote.named_steps['preprocessing'].get_feature_names_out()

coeficientes = get_coeficientes(coef, columns_names[:15])
gr = show_coeficientes(coeficientes)
gr