In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import LogisticRegression

# Cargar datos
df = pd.read_csv('/content/dataset_fc.csv')  #  archivo

# Convertir la columna objetivo a datos numéricos
label_encoder = LabelEncoder()
df['dx_holter_final'] = label_encoder.fit_transform(df['dx_holter_final'])  # 0 para 'arritmia', 1 para 'normal'


X = df.drop(columns=['dx_holter_final']).values
y = df['dx_holter_final'].values

# Imputar valores faltantes
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

# Balancear los datos antes de dividir
smote = SMOTE(random_state=42)
X_balanced, y_balanced = smote.fit_resample(X, y)

# Dividir los datos en entrenamiento y prueba (80% entrenamiento, 20% prueba)
X_train, X_test, y_train, y_test = train_test_split(X_balanced, y_balanced, test_size=0.2, random_state=42, stratify=y_balanced)

# Normalizar los datos
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Definir el modelo de regresión logística
log_reg = LogisticRegression()

# Definir los hiperparámetros a probar
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2', 'elasticnet', 'none'],
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    'max_iter': [100, 200, 300]
}

# Configurar GridSearchCV
grid_search = GridSearchCV(estimator=log_reg, param_grid=param_grid, cv=StratifiedKFold(n_splits=5), verbose=2, n_jobs=-1, scoring='f1')

# Ajustar GridSearchCV a los datos de entrenamiento
grid_search.fit(X_train, y_train)

# Imprimir los mejores hiperparámetros encontrados
print(f'Best parameters: {grid_search.best_params_}')
print(f'Best score: {grid_search.best_score_:.4f}')

# Evaluar el modelo final con los mejores hiperparámetros
best_log_reg = grid_search.best_estimator_
y_pred = best_log_reg.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f'Final Model Accuracy: {accuracy:.4f}')
print(f'Final Model Recall: {recall:.4f}')
print(f'Final Model F1 Score: {f1:.4f}')
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

from sklearn.metrics import confusion_matrix, recall_score

# Realiza las predicciones
y_pred = best_log_reg.predict(X_test)

# Calcular la matriz de confusión
cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()

# Calcular sensibilidad (recall)
sensitivity = recall_score(y_test, y_pred)
print(f'Sensibilidad (Recall): {sensitivity:.4f}')

# Calcular especificidad
specificity = tn / (tn + fp)
print(f'Especificidad: {specificity:.4f}')


Fitting 5 folds for each of 300 candidates, totalling 1500 fits
Best parameters: {'C': 10, 'max_iter': 100, 'penalty': 'l1', 'solver': 'saga'}
Best score: 0.7144
Final Model Accuracy: 0.6486
Final Model Recall: 0.5676
Final Model F1 Score: 0.6176
              precision    recall  f1-score   support

    ARRITMIA       0.63      0.73      0.68        37
      NORMAL       0.68      0.57      0.62        37

    accuracy                           0.65        74
   macro avg       0.65      0.65      0.65        74
weighted avg       0.65      0.65      0.65        74

Sensibilidad (Recall): 0.5676
Especificidad: 0.7297


675 fits failed out of a total of 1500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
75 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py", line 1162, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py", line 54, in _check_solver
    raise ValueError(
ValueError: Solver newton-cg supports only 'l2' or 'none' penalties, got l1 penalty.

-------------------------------