# Regresión Logística Simple

In [1]:
from src.trainning          import create_variables
from src.data_loader        import create_df
from src.evaluation         import show_confusion_matrix, show_metricas, get_coeficientes, show_auc_roc, show_especificidad, show_coeficientes
from src.trainning          import final_pipeline
from src.data_loader        import serialize_models
from src.parser             import YamlParser
from pathlib                import Path


YAML_FILE = Path.cwd() / "config.yaml"
yaml_parser = YamlParser()
config = yaml_parser.load_yaml(YAML_FILE)

df_model_rl = create_df('raw_data')
model_pipeline = final_pipeline(model_type=config['params']['model_type']['logistic_regression'])

In [2]:
X_train, X_test, y_train, y_test = create_variables(df_model_rl)

model_pipeline.fit(X_train, y_train)

serialize_models(model_pipeline, 'rl_01_model')

pred = model_pipeline.predict(X_test)

In [3]:
metricas, accuracy_final = show_metricas(y_test, pred)
especificidad = show_especificidad(y_test, pred)

print(f"\nAccuracy del Modelo Final Optimizado: {accuracy_final:.4f}")
print(f"\nEspecificidad del Modelo Final Optimizado: {especificidad:.4f}")
print("\nReporte de Clasificación del Modelo Final:")
print(metricas)


Accuracy del Modelo Final Optimizado: 0.7472

Especificidad del Modelo Final Optimizado: 0.7613

Reporte de Clasificación del Modelo Final:
              precision    recall  f1-score   support

           0       0.92      0.76      0.83       222
           1       0.35      0.67      0.46        43

    accuracy                           0.75       265
   macro avg       0.64      0.72      0.65       265
weighted avg       0.83      0.75      0.77       265



In [4]:
gr = show_confusion_matrix(y_test, pred, 'Regresion Logistica Simple')
gr

In [5]:
fig, auc_roc = show_auc_roc(model_pipeline, X_test, y_test)
fig

# Regresión Logística con SMOTEENN

In [6]:
model_smote = final_pipeline(model_type=config['params']['model_type']['logistic_regression'], use_smote=True)

model_smote.fit(X_train, y_train)
print(serialize_models(model_smote, 'rl_02_model_smote'))

pred = model_smote.predict(X_test)

Modelo rl_02_model_smote.pkl guardado con éxito


In [7]:
metricas, accuracy_final = show_metricas(y_test, pred)
especificidad = show_especificidad(y_test, pred)

print(f"\nAccuracy del Modelo Final Optimizado: {accuracy_final:.4f}")
print(f"\nEspecificidad del Modelo Final Optimizado: {especificidad:.4f}")
print("\nReporte de Clasificación del Modelo Final:")
print(metricas)


Accuracy del Modelo Final Optimizado: 0.7321

Especificidad del Modelo Final Optimizado: 0.7432

Reporte de Clasificación del Modelo Final:
              precision    recall  f1-score   support

           0       0.92      0.74      0.82       222
           1       0.34      0.67      0.45        43

    accuracy                           0.73       265
   macro avg       0.63      0.71      0.64       265
weighted avg       0.83      0.73      0.76       265



In [8]:
gr = show_confusion_matrix(y_test, pred, 'Regresion Logistica con SMOTEENN')
gr

In [9]:
coef = model_smote.named_steps['model'].coef_[0]
columns_names = model_smote.named_steps['preprocessing'].get_feature_names_out()

coeficientes = get_coeficientes(coef, columns_names)
gr = show_coeficientes(coeficientes)
gr

In [10]:
fig, auc_roc = show_auc_roc(model_smote, X_test, y_test)
fig

# Regresión Logística con SMOTEENN - 15 Variables con mayor coeficiente

In [11]:
model_smote = final_pipeline(
    model_type      =   config['params']['model_type']['logistic_regression'], 
    use_smote       =   True, 
    use_importances =   True
)

model_smote.fit(X_train, y_train)
serialize_models(model_smote, 'rl_03_model_smote_15v')


pred = model_smote.predict(X_test)




Inconsistent values: penalty=l1 with l1_ratio=0.0. penalty is deprecated. Please use l1_ratio only.



In [12]:
metricas, accuracy_final = show_metricas(y_test, pred)
especificidad = show_especificidad(y_test, pred)

print(f"\nAccuracy del Modelo Final Optimizado: {accuracy_final:.4f}")
print(f"\nEspecificidad del Modelo Final Optimizado: {especificidad:.4f}")
print("\nReporte de Clasificación del Modelo Final:")
print(metricas)


Accuracy del Modelo Final Optimizado: 0.6981

Especificidad del Modelo Final Optimizado: 0.6982

Reporte de Clasificación del Modelo Final:
              precision    recall  f1-score   support

           0       0.92      0.70      0.79       222
           1       0.31      0.70      0.43        43

    accuracy                           0.70       265
   macro avg       0.62      0.70      0.61       265
weighted avg       0.82      0.70      0.74       265



In [13]:
gr = show_confusion_matrix(y_test, pred, 'Regresion Logistica - SMOTEENN - 15 Variables')
gr

In [14]:
fig, auc_roc = show_auc_roc(model_smote, X_test, y_test)
fig