# 📊 Clasificación AACC vs no AACC con Machine Learning
Este notebook compara distintos clasificadores usando datos EEG.
Incluye preprocesamiento, reducción con PCA y validación cruzada.

---

In [7]:
# 🔧 Librerías y configuración
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

from xgboost import XGBClassifier

In [8]:
# 📁 Carga de datos
X = pd.read_csv("C:/Users/Eloy/OneDrive - Universidad de Castilla-La Mancha (1)/Tesis_EEG/proyecto_eeg/df_EEG/PRE/X_gamma.csv", index_col=0)
df_meta = pd.read_csv("C:/Users/Eloy/OneDrive - Universidad de Castilla-La Mancha (1)/Tesis_EEG/proyecto_eeg/df_EEG/PRE/meta.csv", index_col=0)

# Filtrar IDs comunes
ids_validos = X.index.intersection(df_meta.index)
X = X.loc[ids_validos]
y = df_meta.loc[ids_validos]["y_recommended"]

In [9]:

# ⚙️ Preprocesamiento
X = X.select_dtypes(include='number')
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

pca = PCA(n_components=0.9, random_state=42)
X_pca = pca.fit_transform(X_scaled)
print(f"➡️ PCA redujo de {X.shape[1]} a {X_pca.shape[1]} dimensiones")

# Dividir en train y test
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.3, stratify=y, random_state=42)


➡️ PCA redujo de 576 a 26 dimensiones


In [10]:
# 🧠 Definir clasificadores y parámetros
modelos = {
    "SVM": (
        SVC(class_weight='balanced', random_state=42),
        {
            "C": [0.1, 1, 10],
            "kernel": ["linear", "rbf"],
            "gamma": ["scale", "auto"]
        }
    ),
    "Random Forest": (
        RandomForestClassifier(class_weight='balanced', random_state=42),
        {
            "n_estimators": [100, 200],
            "max_depth": [None, 10, 20]
        }
    ),
    "XGBoost": (
        XGBClassifier(use_label_encoder=False, eval_metric="logloss"),
        {
            "n_estimators": [50, 100, 200],
            "max_depth": [3, 5, 7],
            "learning_rate": [0.01, 0.1, 0.2]
        }
    ),
    "Logistic Regression": (
        LogisticRegression(class_weight='balanced', max_iter=1000),
        {
            "C": [0.1, 1, 10],
            "penalty": ["l2"]
        }
    ),
    "KNN": (
        KNeighborsClassifier(),
        {
            "n_neighbors": [3, 5, 7]
        }
    ),
    "Decision Tree": (
        DecisionTreeClassifier(class_weight='balanced', random_state=42),
        {
            "max_depth": [None, 10, 20],
            "min_samples_split": [2, 5, 10]
        }
    ),
    "Naive Bayes": (
        GaussianNB(),
        {}
    ),
    "Gradient Boosting": (
        GradientBoostingClassifier(random_state=42),
        {
            "n_estimators": [100, 200],
            "learning_rate": [0.05, 0.1],
            "max_depth": [3, 5]
        }
    ),
    "AdaBoost": (
        AdaBoostClassifier(random_state=42),
        {
            "n_estimators": [50, 100],
            "learning_rate": [0.5, 1.0]
        }
    )
}

In [11]:
# 🔎 Evaluación de modelos
resultados = []
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for nombre, (modelo, param_grid) in modelos.items():
    print(f"🔍 GridSearchCV para: {nombre}")
    grid = GridSearchCV(modelo, param_grid, scoring='f1_macro', cv=cv, n_jobs=-1)
    grid.fit(X_train, y_train)
    best = grid.best_estimator_
    scores_f1 = cross_val_score(best, X_pca, y, cv=cv, scoring='f1_macro')
    scores_acc = cross_val_score(best, X_pca, y, cv=cv, scoring='accuracy')
    resultados.append({
        'Modelo': nombre,
        'Mejor Parámetro': grid.best_params_,
        'F1 Macro (CV)': scores_f1.mean(),
        'Accuracy (CV)': scores_acc.mean()
    })

🔍 GridSearchCV para: SVM
🔍 GridSearchCV para: Random Forest
🔍 GridSearchCV para: XGBoost


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


🔍 GridSearchCV para: Logistic Regression
🔍 GridSearchCV para: KNN
🔍 GridSearchCV para: Decision Tree
🔍 GridSearchCV para: Naive Bayes
🔍 GridSearchCV para: Gradient Boosting
🔍 GridSearchCV para: AdaBoost


In [12]:
# 📈 Mostrar resultados ordenados por F1 Macro
df_resultados = pd.DataFrame(resultados).sort_values(by='F1 Macro (CV)', ascending=False)
df_resultados.reset_index(drop=True, inplace=True)
df_resultados

Unnamed: 0,Modelo,Mejor Parámetro,F1 Macro (CV),Accuracy (CV)
0,SVM,"{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}",0.620886,0.628571
1,Random Forest,"{'max_depth': None, 'n_estimators': 100}",0.553664,0.555844
2,Gradient Boosting,"{'learning_rate': 0.05, 'max_depth': 5, 'n_est...",0.551953,0.558009
3,Logistic Regression,"{'C': 0.1, 'penalty': 'l2'}",0.54649,0.555844
4,AdaBoost,"{'learning_rate': 0.5, 'n_estimators': 50}",0.535891,0.537229
5,XGBoost,"{'learning_rate': 0.01, 'max_depth': 3, 'n_est...",0.52675,0.528139
6,Naive Bayes,{},0.525678,0.563203
7,Decision Tree,"{'max_depth': None, 'min_samples_split': 10}",0.521246,0.528571
8,KNN,{'n_neighbors': 7},0.508935,0.527706
