### HYPERPARAMETER TUNING

In [3]:
import kagglehub
import pandas as pd
import os
import glob
import matplotlib.pyplot as plt
import seaborn as sns

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
path = kagglehub.dataset_download("muratkokludataset/dry-bean-dataset")
archivos = glob.glob(os.path.join(path, "**", "*.xlsx"), recursive=True)

archivo_datos = archivos[0]
df = pd.read_excel(archivo_datos)

display(df.head()) 

df.info() 

duplicate_count = df.duplicated().sum()
print(f"Number of duplicated rows: {duplicate_count}")



Unnamed: 0,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRation,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4,Class
0,28395,610.291,208.178117,173.888747,1.197191,0.549812,28715,190.141097,0.763923,0.988856,0.958027,0.913358,0.007332,0.003147,0.834222,0.998724,SEKER
1,28734,638.018,200.524796,182.734419,1.097356,0.411785,29172,191.27275,0.783968,0.984986,0.887034,0.953861,0.006979,0.003564,0.909851,0.99843,SEKER
2,29380,624.11,212.82613,175.931143,1.209713,0.562727,29690,193.410904,0.778113,0.989559,0.947849,0.908774,0.007244,0.003048,0.825871,0.999066,SEKER
3,30008,645.884,210.557999,182.516516,1.153638,0.498616,30724,195.467062,0.782681,0.976696,0.903936,0.928329,0.007017,0.003215,0.861794,0.994199,SEKER
4,30140,620.134,201.847882,190.279279,1.060798,0.33368,30417,195.896503,0.773098,0.990893,0.984877,0.970516,0.006697,0.003665,0.9419,0.999166,SEKER


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13611 entries, 0 to 13610
Data columns (total 17 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Area             13611 non-null  int64  
 1   Perimeter        13611 non-null  float64
 2   MajorAxisLength  13611 non-null  float64
 3   MinorAxisLength  13611 non-null  float64
 4   AspectRation     13611 non-null  float64
 5   Eccentricity     13611 non-null  float64
 6   ConvexArea       13611 non-null  int64  
 7   EquivDiameter    13611 non-null  float64
 8   Extent           13611 non-null  float64
 9   Solidity         13611 non-null  float64
 10  roundness        13611 non-null  float64
 11  Compactness      13611 non-null  float64
 12  ShapeFactor1     13611 non-null  float64
 13  ShapeFactor2     13611 non-null  float64
 14  ShapeFactor3     13611 non-null  float64
 15  ShapeFactor4     13611 non-null  float64
 16  Class            13611 non-null  object 
dtypes: float64(1

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 1. Eliminar filas duplicadas (si las hay)
df = df.drop_duplicates()

# 2. Separar características (X) y lo que queremos predecir (y)
X = df.drop('Class', axis=1)
y = df['Class']

# 3. Dividir en Entrenamiento (80%) y Prueba (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. ESCALAR LOS DATOS (¡Súper importante aquí!)
# El área del frijol puede ser 100,000, pero su redondez es 0.8. 
# Si no escalamos esto, el modelo SVC se confundirá y tardará horas.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Datos preparados y escalados. Tamaño de entrenamiento:", X_train_scaled.shape)

Datos preparados y escalados. Tamaño de entrenamiento: (10834, 16)


In [9]:
import optuna
import sklearn.ensemble
import sklearn.model_selection
import sklearn.svm

def objective(trial):
    # Optuna decide qué modelo probar
    classifier_name = trial.suggest_categorical("classifier", ["SVC", "RandomForest"])
    
    if classifier_name == "SVC":
        svc_c = trial.suggest_float("svc_c", 1e-4, 1e4, log=True)
        svc_gamma = trial.suggest_categorical("svc_gamma", ["scale", "auto"])
        classifier_obj = sklearn.svm.SVC(C=svc_c, gamma=svc_gamma, random_state=42)
        
    else:
        rf_max_depth = trial.suggest_int("rf_max_depth", 2, 32, log=True)
        rf_n_estimators = trial.suggest_int("rf_n_estimators", 10, 200)
        classifier_obj = sklearn.ensemble.RandomForestClassifier(
            max_depth=rf_max_depth, n_estimators=rf_n_estimators, random_state=42
        )

    # Validación cruzada de 3 pliegues (k-fold)
    score = sklearn.model_selection.cross_val_score(
        classifier_obj, X_train_scaled, y_train, n_jobs=-1, cv=3
    )
    return score.mean()

# Ejecutamos la búsqueda
study = optuna.create_study(direction="maximize")
print("Iniciando Optuna... Esto tomará unos minutos.")
study.optimize(objective, n_trials=300)

print("\n¡Búsqueda terminada!")
print("El mejor modelo y sus parámetros son:", study.best_params)
print("Exactitud (Accuracy) lograda en entrenamiento:", study.best_value)

[32m[I 2026-03-01 16:45:37,781][0m A new study created in memory with name: no-name-3322d7f7-6e29-4b19-b96e-9b971b689f20[0m


Iniciando Optuna... Esto tomará unos minutos.


[32m[I 2026-03-01 16:45:39,490][0m Trial 0 finished with value: 0.9228356272449579 and parameters: {'classifier': 'SVC', 'svc_c': 675.2302170287952, 'svc_gamma': 'auto'}. Best is trial 0 with value: 0.9228356272449579.[0m
[32m[I 2026-03-01 16:45:42,145][0m Trial 1 finished with value: 0.8856375493894574 and parameters: {'classifier': 'RandomForest', 'rf_max_depth': 4, 'rf_n_estimators': 175}. Best is trial 0 with value: 0.9228356272449579.[0m
[32m[I 2026-03-01 16:45:43,694][0m Trial 2 finished with value: 0.9096361666737715 and parameters: {'classifier': 'RandomForest', 'rf_max_depth': 7, 'rf_n_estimators': 62}. Best is trial 0 with value: 0.9228356272449579.[0m
[32m[I 2026-03-01 16:45:47,404][0m Trial 3 finished with value: 0.9223740234685472 and parameters: {'classifier': 'RandomForest', 'rf_max_depth': 18, 'rf_n_estimators': 123}. Best is trial 0 with value: 0.9228356272449579.[0m
[32m[I 2026-03-01 16:45:48,658][0m Trial 4 finished with value: 0.9254201688188924 and pa


¡Búsqueda terminada!
El mejor modelo y sus parámetros son: {'classifier': 'SVC', 'svc_c': 4.377114468921454, 'svc_gamma': 'auto'}
Exactitud (Accuracy) lograda en entrenamiento: 0.9303120392459814


In [10]:
from sklearn.metrics import classification_report

best_params = study.best_params

# Construimos el modelo ganador
if best_params['classifier'] == 'SVC':
    modelo_final = sklearn.svm.SVC(
        C=best_params['svc_c'], 
        gamma=best_params['svc_gamma'], 
        random_state=42
    )
else:
    modelo_final = sklearn.ensemble.RandomForestClassifier(
        max_depth=best_params['rf_max_depth'], 
        n_estimators=best_params['rf_n_estimators'], 
        random_state=42
    )

# Lo entrenamos usando TODOS los datos de entrenamiento
modelo_final.fit(X_train_scaled, y_train)

# EXAMEN FINAL: Predecimos sobre el Test Set que el modelo nunca ha visto
y_pred = modelo_final.predict(X_test_scaled)
test_accuracy = modelo_final.score(X_test_scaled, y_test)

print(f"Exactitud Final en el Test Set: {test_accuracy * 100:.2f}%\n")
print("Reporte detallado por clase de frijol:")
print(classification_report(y_test, y_pred))

Exactitud Final en el Test Set: 93.36%

Reporte detallado por clase de frijol:
              precision    recall  f1-score   support

    BARBUNYA       0.95      0.93      0.94       259
      BOMBAY       1.00      1.00      1.00       114
        CALI       0.93      0.96      0.95       305
    DERMASON       0.93      0.93      0.93       707
       HOROZ       0.98      0.95      0.96       376
       SEKER       0.95      0.95      0.95       414
        SIRA       0.88      0.89      0.89       534

    accuracy                           0.93      2709
   macro avg       0.94      0.94      0.94      2709
weighted avg       0.93      0.93      0.93      2709



### ENSEMBLE METHODS

In [11]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, BaggingClassifier, AdaBoostClassifier, StackingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder

# ==========================================
# 1. VOTING CLASSIFIER (Soft Voting)
# ==========================================
print("1. Entrenando Voting Classifier...")
log_clf = LogisticRegression(max_iter=1000, random_state=42)
rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)
svm_clf = SVC(probability=True, random_state=42) # probability=True es necesario para voting='soft'

voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='soft'
)
voting_clf.fit(X_train_scaled, y_train)
print(f"-> Accuracy Voting: {accuracy_score(y_test, voting_clf.predict(X_test_scaled)):.4f}\n")

# ==========================================
# 2. BAGGING CLASSIFIER
# ==========================================
print("2. Entrenando Bagging Classifier...")
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(random_state=42),
    n_estimators=100,
    max_samples=1.0,
    bootstrap=True,
    n_jobs=-1,
    random_state=42
)
bag_clf.fit(X_train_scaled, y_train)
print(f"-> Accuracy Bagging: {accuracy_score(y_test, bag_clf.predict(X_test_scaled)):.4f}\n")

# ==========================================
# 3. ADABOOST CLASSIFIER
# ==========================================
print("3. Entrenando AdaBoost...")
ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=3, random_state=42),
    n_estimators=100,
    learning_rate=0.5,
    random_state=42
)
ada_clf.fit(X_train_scaled, y_train)
print(f"-> Accuracy AdaBoost: {accuracy_score(y_test, ada_clf.predict(X_test_scaled)):.4f}\n")

# ==========================================
# 4. STACKING CLASSIFIER
# ==========================================
print("4. Entrenando Stacking Classifier...")
base_estimators = [
    ('lr', LogisticRegression(max_iter=1000, random_state=42)),
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('svc', SVC(probability=True, random_state=42))
]
final_blender = LogisticRegression(max_iter=1000, random_state=42)

stacking_clf = StackingClassifier(
    estimators=base_estimators,
    final_estimator=final_blender,
    cv=3,
    n_jobs=-1
)
stacking_clf.fit(X_train_scaled, y_train)
print(f"-> Accuracy Stacking: {accuracy_score(y_test, stacking_clf.predict(X_test_scaled)):.4f}\n")

# ==========================================
# 5. XGBOOST (Gradient Boosting)
# ==========================================
print("5. Entrenando XGBoost...")
# XGBoost requiere que las clases de texto (SEKER, BOMBAY, etc.) sean números (0, 1, 2...)
# Usamos LabelEncoder para transformarlas temporalmente
le = LabelEncoder()
y_train_num = le.fit_transform(y_train)
y_test_num = le.transform(y_test)

xgb_clf = xgb.XGBClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=6,
    random_state=42,
    use_label_encoder=False,
    eval_metric='mlogloss'
)
xgb_clf.fit(X_train_scaled, y_train_num)
print(f"-> Accuracy XGBoost: {accuracy_score(y_test_num, xgb_clf.predict(X_test_scaled)):.4f}\n")

1. Entrenando Voting Classifier...
-> Accuracy Voting: 0.9262

2. Entrenando Bagging Classifier...
-> Accuracy Bagging: 0.9206

3. Entrenando AdaBoost...
-> Accuracy AdaBoost: 0.9140

4. Entrenando Stacking Classifier...
-> Accuracy Stacking: 0.9265

5. Entrenando XGBoost...


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


-> Accuracy XGBoost: 0.9269



### XGBoost + Optuna

In [12]:
import optuna
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, accuracy_score

# 1. Transformar etiquetas de texto a números (XGBoost lo exige)
le = LabelEncoder()
y_train_num = le.fit_transform(y_train)
y_test_num = le.transform(y_test)

# 2. Definimos la función de Optuna exclusiva para XGBoost
def objective_xgb(trial):
    # Rango de hiperparámetros que Optuna va a explorar
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 300),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'random_state': 42,
        'eval_metric': 'mlogloss' # Para evitar warnings en multiclase
    }
    
    xgb_clf = xgb.XGBClassifier(**param)
    
    # Evaluación con Cross-Validation de 3 pliegues
    score = cross_val_score(xgb_clf, X_train_scaled, y_train_num, n_jobs=-1, cv=3)
    return score.mean()

# 3. Iniciamos la búsqueda inteligente
study_xgb = optuna.create_study(direction="maximize")
print("Iniciando la búsqueda inteligente de Optuna para XGBoost (30 pruebas)...")
study_xgb.optimize(objective_xgb, n_trials=30)

# 4. Construimos y entrenamos el modelo definitivo con los hiperparámetros ganadores
print("\n¡Búsqueda terminada! Entrenando el mejor modelo...")
best_params = study_xgb.best_params
best_params['random_state'] = 42
best_params['eval_metric'] = 'mlogloss'

final_xgb = xgb.XGBClassifier(**best_params)
final_xgb.fit(X_train_scaled, y_train_num)

# 5. Examen final con el Test Set
y_pred_num = final_xgb.predict(X_test_scaled)
test_accuracy = accuracy_score(y_test_num, y_pred_num)

print(f"\n==========================================")
print(f"Mejores parámetros XGBoost: {best_params}")
print(f"Exactitud (Accuracy) en el Test Set: {test_accuracy * 100:.2f}%")
print(f"==========================================\n")

# Transformamos los números de vuelta a nombres de frijoles para el reporte
y_pred_nombres = le.inverse_transform(y_pred_num)
print("Reporte detallado por clase de frijol:")
print(classification_report(y_test, y_pred_nombres))

[32m[I 2026-03-01 16:54:46,444][0m A new study created in memory with name: no-name-f75fc49c-9ba1-4396-9c97-e6da4f6ab88f[0m


Iniciando la búsqueda inteligente de Optuna para XGBoost (30 pruebas)...


[32m[I 2026-03-01 16:54:49,071][0m Trial 0 finished with value: 0.9264355080079643 and parameters: {'n_estimators': 190, 'learning_rate': 0.09255410164635217, 'max_depth': 5, 'subsample': 0.7947968429585397, 'colsample_bytree': 0.8478144279946354}. Best is trial 0 with value: 0.9264355080079643.[0m
[32m[I 2026-03-01 16:54:55,325][0m Trial 1 finished with value: 0.9268045967987361 and parameters: {'n_estimators': 290, 'learning_rate': 0.039130472241210495, 'max_depth': 9, 'subsample': 0.6893877485332484, 'colsample_bytree': 0.9755424239260166}. Best is trial 1 with value: 0.9268045967987361.[0m
[32m[I 2026-03-01 16:54:57,810][0m Trial 2 finished with value: 0.9273583833246493 and parameters: {'n_estimators': 78, 'learning_rate': 0.04428466943523394, 'max_depth': 10, 'subsample': 0.7158857931848475, 'colsample_bytree': 0.8553373574004605}. Best is trial 2 with value: 0.9273583833246493.[0m
[32m[I 2026-03-01 16:54:59,429][0m Trial 3 finished with value: 0.9246814801048312 and p


¡Búsqueda terminada! Entrenando el mejor modelo...

Mejores parámetros XGBoost: {'n_estimators': 133, 'learning_rate': 0.06197018767020555, 'max_depth': 10, 'subsample': 0.6068056842514755, 'colsample_bytree': 0.7998201956267917, 'random_state': 42, 'eval_metric': 'mlogloss'}
Exactitud (Accuracy) en el Test Set: 92.54%

Reporte detallado por clase de frijol:
              precision    recall  f1-score   support

    BARBUNYA       0.94      0.91      0.92       259
      BOMBAY       1.00      1.00      1.00       114
        CALI       0.93      0.96      0.95       305
    DERMASON       0.91      0.93      0.92       707
       HOROZ       0.96      0.95      0.95       376
       SEKER       0.94      0.94      0.94       414
        SIRA       0.88      0.87      0.88       534

    accuracy                           0.93      2709
   macro avg       0.94      0.94      0.94      2709
weighted avg       0.93      0.93      0.93      2709



### SMOTE

In [None]:
import optuna
import lightgbm as lgb
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, accuracy_score
import warnings
warnings.filterwarnings('ignore') # Para que no te llene la pantalla de avisos

# 1. Transformamos texto a números
le = LabelEncoder()
y_train_num = le.fit_transform(y_train)
y_test_num = le.transform(y_test)

# 2. APLICAMOS SMOTE: Creamos frijoles sintéticos para igualar las clases
print("Balanceando datos con SMOTE...")
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train_scaled, y_train_num)
print(f"Datos originales: {X_train_scaled.shape[0]} filas. Datos con SMOTE: {X_train_smote.shape[0]} filas.")

# 3. Definimos Optuna para LightGBM
def objective_lgb(trial):
    param = {
        'objective': 'multiclass',
        'num_class': 7,
        'metric': 'multi_logloss',
        'verbosity': -1,
        'boosting_type': 'gbdt',
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.1, log=True),
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'max_depth': trial.suggest_int('max_depth', 5, 20),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.5, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.5, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 50),
        'random_state': 42
    }
    
    # Modelo LightGBM
    lgb_clf = lgb.LGBMClassifier(**param)
    
    # Validamos usando los datos ENRIQUECIDOS con SMOTE
    score = cross_val_score(lgb_clf, X_train_smote, y_train_smote, n_jobs=-1, cv=3)
    return score.mean()

# 4. OPTUNA EXTREMO (100 iteraciones)
study_lgb = optuna.create_study(direction="maximize")
print("Iniciando Optuna Extremo (100 pruebas)... Ve por un café ☕")
study_lgb.optimize(objective_lgb, n_trials=100)

# 5. Entrenamos el modelo definitivo
print("\n¡Búsqueda terminada! Entrenando el Súper Modelo Final...")
best_params = study_lgb.best_params
best_params['objective'] = 'multiclass'
best_params['random_state'] = 42
best_params['verbosity'] = -1

final_lgb = lgb.LGBMClassifier(**best_params)
final_lgb.fit(X_train_smote, y_train_smote)

# 6. Examen final en el Test Set original (escalado, sin adulterar)
y_pred_num = final_lgb.predict(X_test_scaled)
test_accuracy = accuracy_score(y_test_num, y_pred_num)

print(f"\n==========================================")
print(f"Mejores parámetros LightGBM: {best_params}")
print(f"Exactitud (Accuracy) Final: {test_accuracy * 100:.2f}%")
print(f"==========================================\n")

y_pred_nombres = le.inverse_transform(y_pred_num)
print(classification_report(y_test, y_pred_nombres))

Balanceando datos con SMOTE...


[32m[I 2026-03-01 17:01:45,879][0m A new study created in memory with name: no-name-835d0f3c-df29-4eec-ac5a-b2f726dac946[0m


Datos originales: 10834 filas. Datos con SMOTE: 19873 filas.
Iniciando Optuna Extremo (100 pruebas)... Ve por un café ☕


[32m[I 2026-03-01 17:01:57,915][0m Trial 0 finished with value: 0.9587889207911767 and parameters: {'n_estimators': 277, 'learning_rate': 0.01905593442354854, 'num_leaves': 118, 'max_depth': 8, 'feature_fraction': 0.9673491615245339, 'bagging_fraction': 0.797243478885159, 'bagging_freq': 2, 'min_child_samples': 26}. Best is trial 0 with value: 0.9587889207911767.[0m
[32m[I 2026-03-01 17:02:15,059][0m Trial 1 finished with value: 0.9620597180445416 and parameters: {'n_estimators': 787, 'learning_rate': 0.0620193810996144, 'num_leaves': 119, 'max_depth': 9, 'feature_fraction': 0.8184041568653363, 'bagging_fraction': 0.53988925965117, 'bagging_freq': 6, 'min_child_samples': 30}. Best is trial 1 with value: 0.9620597180445416.[0m
[32m[I 2026-03-01 17:02:27,881][0m Trial 2 finished with value: 0.9622609683103941 and parameters: {'n_estimators': 739, 'learning_rate': 0.09484806346387344, 'num_leaves': 64, 'max_depth': 10, 'feature_fraction': 0.9222206716935615, 'bagging_fraction': 0.

In [None]:
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

print("Preparando a los 3 titanes...")

# 1. LightGBM (Más árboles, aprendizaje más lento)
lgb_params = {
    'n_estimators': 1000,           # ANTES: 499
    'learning_rate': 0.01,          # ANTES: 0.0391
    'num_leaves': 64, 
    'max_depth': 15, 'feature_fraction': 0.99, 'bagging_fraction': 0.69, 
    'bagging_freq': 4, 'min_child_samples': 19, 'objective': 'multiclass', 
    'verbosity': -1, 'random_state': 42
}
lgb_model = lgb.LGBMClassifier(**lgb_params)

# 2. XGBoost (Más árboles, aprendizaje más lento)
xgb_params = {
    'n_estimators': 1000,           # ANTES: 133
    'learning_rate': 0.01,          # ANTES: 0.0619
    'max_depth': 10, 
    'subsample': 0.606, 'colsample_bytree': 0.799, 'eval_metric': 'mlogloss', 
    'random_state': 42
}
xgb_model = xgb.XGBClassifier(**xgb_params)

# 3. CatBoost (Entrenamiento profundo)
cat_model = CatBoostClassifier(
    iterations=2000,                # ANTES: 500
    depth=8, 
    learning_rate=0.02,             # ANTES: 0.05
    loss_function='MultiClass', verbose=0, random_state=42
)

estimadores_base = [
    ('LightGBM', lgb_model),
    ('XGBoost', xgb_model),
    ('CatBoost', cat_model)
]

jefe_final = LogisticRegression(max_iter=2000, random_state=42)

mega_ensamble = StackingClassifier(
    estimators=estimadores_base,
    final_estimator=jefe_final,
    cv=5,                           # ANTES: 3. (Puedes poner 10 si tienes mucho tiempo)
    n_jobs=-1
)

print("Entrenando el Meta-Ensamble en MODO EXTREMO (Puede tardar 10-20 minutos)...")
mega_ensamble.fit(X_train_smote, y_train_smote)

# Predicción
y_pred_num = mega_ensamble.predict(X_test_scaled)
print(f"🏆 EXACTITUD FINAL: {accuracy_score(y_test_num, y_pred_num) * 100:.2f}% 🏆")

# Volvemos a poner los nombres de los frijoles para leer el reporte
y_pred_nombres = le.inverse_transform(y_pred_num)
print("Reporte detallado por clase:")
print(classification_report(y_test, y_pred_nombres))