In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE
from sklearn.neural_network import MLPClassifier

# Cargar datos
df = pd.read_csv('/content/dataset_fc.csv')

# Convertir la columna objetivo a datos numéricos
label_encoder = LabelEncoder()
df['dx_holter_final'] = label_encoder.fit_transform(df['dx_holter_final'])  # 0 para 'arritmia', 1 para 'normal'


X = df.drop(columns=['dx_holter_final']).values
y = df['dx_holter_final'].values

# Imputar valores faltantes
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

# Balancear los datos antes de dividir
smote = SMOTE(random_state=42)
X_balanced, y_balanced = smote.fit_resample(X, y)

# Dividir los datos en entrenamiento y prueba (80% entrenamiento, 20% prueba)
X_train, X_test, y_train, y_test = train_test_split(X_balanced, y_balanced, test_size=0.2, random_state=42, stratify=y_balanced)

# Normalizar los datos
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Definir el modelo MLP
mlp = MLPClassifier(max_iter=1000)

# Definir los hiperparámetros a probar
param_grid = {
    #'hidden_layer_sizes': [(50, 50), (100, 100), (150, 150)],
    'hidden_layer_sizes': [(50,), (100,), (150,), (50, 50), (100, 100), (150, 150)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'adaptive']
}

# Configurar GridSearchCV
grid_search = GridSearchCV(estimator=mlp, param_grid=param_grid, cv=StratifiedKFold(n_splits=5), verbose=2, n_jobs=-1, scoring='f1')

# Ajustar GridSearchCV a los datos de entrenamiento
grid_search.fit(X_train, y_train)

# Imprimir los mejores hiperparámetros encontrados
print(f'Best parameters: {grid_search.best_params_}')
print(f'Best score: {grid_search.best_score_:.4f}')

# Evaluar el modelo final con los mejores hiperparámetros
best_mlp = grid_search.best_estimator_
y_pred = best_mlp.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f'Final Model Accuracy: {accuracy:.4f}')
print(f'Final Model Recall: {recall:.4f}')
print(f'Final Model F1 Score: {f1:.4f}')
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

from sklearn.metrics import confusion_matrix, recall_score

# Realiza las predicciones
y_pred = best_mlp.predict(X_test)

# Calcular la matriz de confusión
cm = confusion_matrix(y_test, y_pred)
tn, fp, fn, tp = cm.ravel()

# Calcular sensibilidad (recall)
sensitivity = recall_score(y_test, y_pred)
print(f'Sensibilidad (Recall): {sensitivity:.4f}')

# Calcular especificidad
specificity = tn / (tn + fp)
print(f'Especificidad: {specificity:.4f}')


Fitting 5 folds for each of 144 candidates, totalling 720 fits
Best parameters: {'activation': 'relu', 'alpha': 0.01, 'hidden_layer_sizes': (150, 150), 'learning_rate': 'constant', 'solver': 'adam'}
Best score: 0.8354
Final Model Accuracy: 0.8919
Final Model Recall: 0.7838
Final Model F1 Score: 0.8788
              precision    recall  f1-score   support

    ARRITMIA       0.82      1.00      0.90        37
      NORMAL       1.00      0.78      0.88        37

    accuracy                           0.89        74
   macro avg       0.91      0.89      0.89        74
weighted avg       0.91      0.89      0.89        74

Sensibilidad (Recall): 0.7838
Especificidad: 1.0000
