In [12]:

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler
import joblib

In [13]:

data = pd.read_csv('/content/drive/MyDrive/ds/cleaned_dataset4.csv')

Mounted at /content/drive


In [14]:

bool_columns = data.select_dtypes(include=['bool']).columns
data[bool_columns] = data[bool_columns].astype(int)

data = data.apply(pd.to_numeric, errors='coerce')
data = data.dropna()

In [15]:
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
y = y.flatten()

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [16]:

best_params = {
    'n_layers': 3,
    'n_units': 120,
    'activation': 'relu',
    'dropout_rate': 0.0373,
    'batch_size': 45,
    'epochs': 50
}

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)


model = keras.Sequential()
model.add(keras.layers.Input(shape=(X_train.shape[1],)))

for _ in range(best_params['n_layers']):
    model.add(keras.layers.Dense(best_params['n_units'], activation=best_params['activation']))
    model.add(keras.layers.Dropout(best_params['dropout_rate']))

# Capa de salida
model.add(keras.layers.Dense(1, activation='sigmoid'))


model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


model.fit(X_train, y_train, epochs=best_params['epochs'], batch_size=best_params['batch_size'], verbose=1)

Epoch 1/48
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/step - accuracy: 0.6782 - loss: 0.5953
Epoch 2/48
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.7993 - loss: 0.4393
Epoch 3/48
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.8315 - loss: 0.3841
Epoch 4/48
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8428 - loss: 0.3562
Epoch 5/48
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8450 - loss: 0.3585
Epoch 6/48
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8671 - loss: 0.3197
Epoch 7/48
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8688 - loss: 0.3179
Epoch 8/48
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8755 - loss: 0.3004
Epoch 9/48
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x7f72867754b0>

In [None]:
model.save('nn_stroke.keras')
joblib.dump(scaler, 'nn_scaler.joblib')

In [17]:
# Evaluar el modelo
y_pred = (model.predict(X_test) > 0.5).astype("int32")  # Umbral de 0.5 para clasificar como 1
accuracy = accuracy_score(y_test, y_pred)
print(f"Precisión en el conjunto de prueba: {accuracy:.4f}")

# Mostrar el reporte de clasificación
print("Reporte de clasificación:")
print(classification_report(y_test, y_pred))

# Mostrar la matriz de confusión
print("Matriz de confusión:")
print(confusion_matrix(y_test, y_pred))

[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Precisión en el conjunto de prueba: 0.9129
Reporte de clasificación:
              precision    recall  f1-score   support

         0.0       0.95      0.87      0.91       946
         1.0       0.88      0.96      0.92       948

    accuracy                           0.91      1894
   macro avg       0.92      0.91      0.91      1894
weighted avg       0.92      0.91      0.91      1894

Matriz de confusión:
[[822 124]
 [ 41 907]]
