In [24]:

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler
import joblib

In [25]:

data = pd.read_csv('data/stroke_woe_smote.csv')

In [26]:

bool_columns = data.select_dtypes(include=['bool']).columns
data[bool_columns] = data[bool_columns].astype(int)

data = data.apply(pd.to_numeric, errors='coerce')
data = data.dropna()

In [27]:
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
y = y.flatten()

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [28]:

best_params = {'n_layers': 5, 'n_units': 834, 'activation': 'relu', 'dropout_rate': 0.006061847852983231, 'batch_size': 74, 'epochs': 246}

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)


model = keras.Sequential()
model.add(keras.layers.Input(shape=(X_train.shape[1],)))

for _ in range(best_params['n_layers']):
    model.add(keras.layers.Dense(best_params['n_units'], activation=best_params['activation']))
    model.add(keras.layers.Dropout(best_params['dropout_rate']))

# Capa de salida
model.add(keras.layers.Dense(1, activation='sigmoid'))


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


model.fit(X_train, y_train, epochs=best_params['epochs'], batch_size=best_params['batch_size'], verbose=1)

Epoch 1/246
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 78ms/step - accuracy: 0.7176 - loss: 0.5333
Epoch 2/246
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 69ms/step - accuracy: 0.7693 - loss: 0.4728
Epoch 3/246
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 77ms/step - accuracy: 0.7778 - loss: 0.4594
Epoch 4/246
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 81ms/step - accuracy: 0.7916 - loss: 0.4295
Epoch 5/246
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 76ms/step - accuracy: 0.8000 - loss: 0.4065
Epoch 6/246
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 97ms/step - accuracy: 0.8074 - loss: 0.4007
Epoch 7/246
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 89ms/step - accuracy: 0.8011 - loss: 0.3994
Epoch 8/246
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 82ms/step - accuracy: 0.8119 - loss: 0.3880
Epoch 9/246
[1m90/90[0m [32m━━━━━━

<keras.src.callbacks.history.History at 0x29303ab7ec0>

In [29]:

model.save('model/nn_stroke.keras')
joblib.dump(scaler, 'model/nn_scaler.joblib')


['model/nn_scaler.joblib']

In [30]:
# Evaluar el modelo
y_pred = (model.predict(X_test) > 0.5).astype("int32")  # Umbral de 0.5 para clasificar como 1
accuracy = accuracy_score(y_test, y_pred)
print(f"Precisión en el conjunto de prueba: {accuracy:.4f}")

# Mostrar el reporte de clasificación
print("Reporte de clasificación:")
print(classification_report(y_test, y_pred))

# Mostrar la matriz de confusión
print("Matriz de confusión:")
print(confusion_matrix(y_test, y_pred))

[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step
Precisión en el conjunto de prueba: 0.8933
Reporte de clasificación:
              precision    recall  f1-score   support

           0       0.91      0.87      0.89      1407
           1       0.88      0.92      0.90      1433

    accuracy                           0.89      2840
   macro avg       0.89      0.89      0.89      2840
weighted avg       0.89      0.89      0.89      2840

Matriz de confusión:
[[1222  185]
 [ 118 1315]]
