In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from tensorflow import keras

# --- 1. Cargar datos ---
df = pd.read_csv("../data/heart.csv")  # ruta al CSV
X = df.drop("target", axis=1)
y = df["target"]

# --- 2. Columnas ---
num_cols = ['age','trestbps','chol','thalach','oldpeak']
cat_cols = [c for c in X.columns if c not in num_cols]

# --- 3. Preprocesamiento ---
num_pipe = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])
cat_pipe = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer([
    ("num", num_pipe, num_cols),
    ("cat", cat_pipe, cat_cols)
])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

# --- 4. Modelo clásico: Random Forest ---
rf = Pipeline([
    ("prep", preprocessor),
    ("clf", RandomForestClassifier(n_estimators=200, random_state=42))
])
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
print("RandomForest Accuracy:", accuracy_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))

# --- 5. Red Neuronal (Keras) ---
from sklearn.preprocessing import OneHotEncoder
# Necesitamos arrays ya preprocesados
X_train_proc = preprocessor.fit_transform(X_train)
X_test_proc = preprocessor.transform(X_test)

model = keras.Sequential([
    keras.layers.Input(shape=(X_train_proc.shape[1],)),
    keras.layers.Dense(16, activation='relu'),
    keras.layers.Dense(8, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train_proc, y_train, epochs=50, batch_size=16, verbose=0)
print("NN Accuracy:", model.evaluate(X_test_proc, y_test, verbose=0)[1])

# --- 6. Algoritmo adicional: KNN ---
knn = Pipeline([
    ("prep", preprocessor),
    ("clf", KNeighborsClassifier(n_neighbors=5))
])
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
print("F1:", f1_score(y_test, y_pred_knn))


RandomForest Accuracy: 0.9902597402597403
F1: 0.9904153354632588
NN Accuracy: 0.948051929473877
KNN Accuracy: 0.8538961038961039
F1: 0.8562300319488818
