In [None]:
import pandas as pd
import time
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_predict, StratifiedKFold

# --- Cargar datos Pima Indian Diabetes ---
print("Cargando datos Pima Indian Diabetes...")
csv_path = "../datasets/pima_indian_diabetes_dataset/cleaned_dataset.csv"
df = pd.read_csv(csv_path)

# Asumimos que la última columna es la etiqueta
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

print(f"Datos cargados: {X.shape[0]} muestras, {X.shape[1]} características.\n")

# Configurar validación cruzada
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Lista para almacenar resultados
results = []

# --- SVM ---
print("Evaluando modelo: SVM...")
svm = SVC(probability=True, kernel='rbf', gamma='scale')
start = time.time()
y_svm_pred = cross_val_predict(svm, X, y, cv=cv, method='predict_proba', n_jobs=-1)[:, 1]
svm_time = time.time() - start
auc_svm = roc_auc_score(y, y_svm_pred)
print(f"Finalizado SVM -> AUC: {auc_svm:.4f} | Tiempo: {svm_time:.2f} segundos\n")
results.append({"Model": "SVM", "AUC (5-fold)": auc_svm, "Time (s)": svm_time})

# --- Random Forest ---
print("Evaluando modelo: Random Forest...")
rf = RandomForestClassifier(n_estimators=100, random_state=42)
start = time.time()
y_rf_pred = cross_val_predict(rf, X, y, cv=cv, method='predict_proba', n_jobs=-1)[:, 1]
rf_time = time.time() - start
auc_rf = roc_auc_score(y, y_rf_pred)
print(f"Finalizado Random Forest -> AUC: {auc_rf:.4f} | Tiempo: {rf_time:.2f} segundos\n")
results.append({"Model": "Random Forest", "AUC (5-fold)": auc_rf, "Time (s)": rf_time})

# --- AdaBoost ---
print("Evaluando modelo: AdaBoost...")
ada = AdaBoostClassifier(n_estimators=50, random_state=42)
start = time.time()
y_ada_pred = cross_val_predict(ada, X, y, cv=cv, method='predict_proba', n_jobs=-1)[:, 1]
ada_time = time.time() - start
auc_ada = roc_auc_score(y, y_ada_pred)
print(f"Finalizado AdaBoost -> AUC: {auc_ada:.4f} | Tiempo: {ada_time:.2f} segundos\n")
results.append({"Model": "AdaBoost", "AUC (5-fold)": auc_ada, "Time (s)": ada_time})

# --- Stacking ---
print("Evaluando modelo: Stacking (SVM + RF + AdaBoost)...")
base_learners = [
    ('svm', SVC(probability=True, kernel='rbf', gamma='scale')),
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('ada', AdaBoostClassifier(n_estimators=50, random_state=42))
]
meta_model = LogisticRegression(max_iter=1000)
stack = StackingClassifier(estimators=base_learners, final_estimator=meta_model, cv=5, n_jobs=-1)

start = time.time()
y_stack_pred = cross_val_predict(stack, X, y, cv=cv, method='predict_proba', n_jobs=-1)[:, 1]
stack_time = time.time() - start
auc_stack = roc_auc_score(y, y_stack_pred)
print(f"Finalizado Stacking -> AUC: {auc_stack:.4f} | Tiempo: {stack_time:.2f} segundos\n")
results.append({"Model": "Stacking", "AUC (5-fold)": auc_stack, "Time (s)": stack_time})

# --- Mostrar resultados finales ---
df_results = pd.DataFrame(results)
df_results["AUC (5-fold)"] = df_results["AUC (5-fold)"].round(4)
df_results["Time (s)"] = df_results["Time (s)"].round(2)

print("=== RESUMEN FINAL DE EVALUACIÓN (PIMA, 5-FOLD CV) ===")
print(df_results.to_string(index=False))


Cargando datos Pima Indian Diabetes...
Datos cargados: 392 muestras, 8 características.

Evaluando modelo: SVM...
Finalizado SVM -> AUC: 0.8148 | Tiempo: 0.03 segundos

Evaluando modelo: Random Forest...
Finalizado Random Forest -> AUC: 0.8449 | Tiempo: 0.13 segundos

Evaluando modelo: AdaBoost...
Finalizado AdaBoost -> AUC: 0.8253 | Tiempo: 0.07 segundos

Evaluando modelo: Stacking (SVM + RF + AdaBoost)...
Finalizado Stacking -> AUC: 0.7018 | Tiempo: 1.14 segundos

=== RESUMEN FINAL DE EVALUACIÓN (PIMA, 5-FOLD CV) ===
        Model  AUC (5-fold)  Time (s)
          SVM        0.8148      0.03
Random Forest        0.8449      0.13
     AdaBoost        0.8253      0.07
     Stacking        0.7018      1.14
