In [1]:
from itertools import product

import sys
import os

# Ruta absoluta a la carpeta raíz (donde está la carpeta "scripts")
root_path = os.path.abspath(os.path.join(os.getcwd(), '..'))  # Subís un nivel
sys.path.append(root_path)

param_grid = {
    'k_neighbors': [3, 5, 7],
    'radio_densidad': [0.8, 1.0, 1.2],
    'percentil_dist': [50, 75, 90]
}


In [2]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))  # Ajuste de ruta si estás dentro de notebooks/

from datasets.config_datasets import config_datasets
from datasets.cargar_dataset import cargar_dataset

for nombre, cfg in config_datasets.items():
    print(f"\n🔍 Procesando dataset: {nombre}")
    try:
        X, y_bin = cargar_dataset(
            path=cfg['path'],
            clase_minoria=cfg['clase_minoria'],
            col_features=cfg['col_features'],
            col_target=cfg['col_target'],
            sep=cfg['sep'],
            header=cfg['header']
        )
        print(f"✅ Dataset {nombre} cargado correctamente.")
        print(f"🔴 Instancias minoritarias: {sum(y_bin==1)}")
        print(f"⚪ Instancias mayoritarias: {sum(y_bin==0)}")

        # Aquí llamás a tu función PC-SMOTE con X, y_bin
        # Ejemplo:
        # X_res, y_res = aplicar_pc_smote(X, y_bin, **params)

    except Exception as e:
        print(f"❌ Error al procesar {nombre}: {e}")



🔍 Procesando dataset: ecoli
✅ Dataset ecoli cargado correctamente.
🔴 Instancias minoritarias: 2
⚪ Instancias mayoritarias: 334

🔍 Procesando dataset: wdbc
✅ Dataset wdbc cargado correctamente.
🔴 Instancias minoritarias: 212
⚪ Instancias mayoritarias: 357

🔍 Procesando dataset: glass
✅ Dataset glass cargado correctamente.
🔴 Instancias minoritarias: 9
⚪ Instancias mayoritarias: 205

🔍 Procesando dataset: heart
✅ Dataset heart cargado correctamente.
🔴 Instancias minoritarias: 54
⚪ Instancias mayoritarias: 243


In [3]:
from scripts.evaluacion import evaluar_sampler_holdout
from scripts.pc_smote import PCSMOTE
from datasets.config_datasets import config_datasets
from datasets.cargar_dataset import cargar_dataset
import pandas as pd
import numpy as np

# Grilla de hiperparámetros a evaluar
param_grid = [
    (k, radio, p_dist)
    for k in [3, 5, 7]
    for radio in [0.5, 1.0, 1.5]
    for p_dist in [50, 75, 90]
]

resultados = []

# Iterar por cada dataset definido
for nombre_dataset, config in config_datasets.items():
    print(f"\n📊 Evaluando dataset: {nombre_dataset.upper()}")

    # Cargar datos
    try:
        X, y_bin = cargar_dataset(
            path=config["path"],
            clase_minoria=config["clase_minoria"],
            col_features=config["col_features"],
            col_target=config["col_target"],
            sep=config.get("sep", ","),
            header=config.get("header", "infer")
        )
    except Exception as e:
        print(f"❌ Error al cargar dataset {nombre_dataset}: {e}")
        continue

    if len(X) == 0 or len(y_bin) == 0:
        print(f"⚠️ Dataset vacío: {nombre_dataset}. Saltando...")
        continue

    total_minorias = np.sum(y_bin == 1)
    if total_minorias < 3:
        print(f"⚠️ Muy pocas muestras minoritarias ({total_minorias}). Saltando dataset.")
        continue

    # Evaluar combinaciones de parámetros
    for k, radio, p_dist in param_grid:
        # Verificar si habrá suficientes minoritarias en el train
        n_minorias_train_estimado = int(total_minorias * 0.7)
        if n_minorias_train_estimado <= k:
            print(f"⏭️  Saltando combinación k={k} → muy pocas minoritarias en el train ({n_minorias_train_estimado})")
            continue

        print(f"\n🔍 Evaluando combinación: k={k}, radio={radio}, percentil_dist={p_dist}")

        try:
            # Instanciar sampler manualmente para poder acceder al atributo synthetic_count
            sampler = PCSMOTE(
                k_neighbors=k,
                radio_densidad=radio,
                percentil_dist=p_dist,
                random_state=42,  # o None
                verbose=False
            )

            # Hacer un fit_resample para contar muestras sintéticas generadas
            _, _ = sampler.fit_resample(X, y_bin)
            n_sinteticas = sampler.synthetic_count

            # Evaluar el sampler con esa configuración
            metricas = evaluar_sampler_holdout(
                nombre="PC-SMOTE",
                sampler_class=PCSMOTE,
                X=X,
                y_bin=y_bin,
                n_iter=5,
                k_neighbors=k,
                radio_densidad=radio,
                percentil_dist=p_dist
            )

            resultados.append({
                "dataset": nombre_dataset,
                "k": k,
                "radio": radio,
                "percentil_dist": p_dist,
                "synthetic_count": n_sinteticas,  
                **metricas
            })
        except Exception as e:
            print(f"❌ Error durante evaluación de k={k}, radio={radio}, dist={p_dist}: {e}")
            continue

# Consolidar resultados
df_resultados = pd.DataFrame(resultados)
df_resultados = df_resultados.sort_values(by=["dataset", "mean_f1"], ascending=[True, False])
df_resultados.reset_index(drop=True, inplace=True)

df_resultados.to_csv("../resultados/resultado13072025.txt", sep="\t", index=False)
# print("\n✅ Columnas disponibles:", df_resultados.columns.tolist())
# df_resultados.head()



📊 Evaluando dataset: ECOLI
⚠️ Muy pocas muestras minoritarias (2). Saltando dataset.

📊 Evaluando dataset: WDBC

🔍 Evaluando combinación: k=3, radio=0.5, percentil_dist=50
📌 Total muestras minoritarias: 148
📌 Total muestras mayoritarias: 250
📊 Riesgo - media: 0.1374 | min: 0.0000 | max: 1.0000
📊 Densidad - media: 0.0000 | p25: 0.0000 | p50: 0.0000 | p75: 0.0000
📈 Ejemplo de primeros 10 valores de riesgo: [0.    0.    0.667 0.    0.    0.    0.    0.    1.    0.   ]
📈 Ejemplo de primeros 10 valores de densidad: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
🔎 Muestras con riesgo ∈ [0.4, 0.6]: 0 de 148
🔎 Muestras con densidad > 0: 0 de 148
✅ Muestras seleccionadas para sobremuestreo (intersección): 0
❌ Muestras descartadas (ruido o baja densidad): 148
⚠️ No se encontraron muestras válidas para sobremuestreo. Devolviendo conjunto original.
📌 Total muestras minoritarias: 148
📌 Total muestras mayoritarias: 250
📊 Riesgo - media: 0.1306 | min: 0.0000 | max: 1.0000
📊 Densidad - media: 0.0000 | p25: 0.0000 |

## OBJETIVO DEL BLOQUE
Comparar clasificadores (XGBoost, RandomForest, etc.) usando tu técnica PC-SMOTE sobre todos los datasets definidos en config_datasets. Para cada combinación dataset + clasificador, se evaluará el rendimiento y se almacenarán las métricas clave.

In [None]:
from scripts.pc_smote import PCSMOTE
from scripts.evaluacion import evaluar_sampler_holdout
from datasets.cargar_dataset import cargar_dataset
from datasets.config_datasets import config_datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
import pandas as pd
import numpy as np
from imblearn.over_sampling import SMOTE, BorderlineSMOTE, ADASYN


# Parámetros fijos de PC-SMOTE
pcsmote_params = {
    "k_neighbors": 3,
    "radio_densidad": 0.5,
    "percentil_dist": 75
}

# Clasificadores a evaluar
clasificadores = {
    "RandomForest": RandomForestClassifier(random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42),
    "SVM": SVC(probability=True, random_state=42),
    "LogisticRegression": LogisticRegression(max_iter=1000, random_state=42),
    "MLP": MLPClassifier(max_iter=1000, random_state=42),
    "kNN": KNeighborsClassifier()
}

resultados = []

# Iterar por datasets
for nombre_dataset, config in config_datasets.items():
    print(f"\n📂 Evaluando dataset: {nombre_dataset.upper()}")

    # Cargar datos
    try:
        X, y_bin = cargar_dataset(
            path=config["path"],
            clase_minoria=config["clase_minoria"],
            col_features=config["col_features"],
            col_target=config["col_target"],
            sep=config.get("sep", ","),
            header=config.get("header", "infer")
        )
    except Exception as e:
        print(f"❌ Error al cargar {nombre_dataset}: {e}")
        continue

    if np.sum(y_bin == 1) < 3:
        print(f"⚠️ Pocas muestras minoritarias en {nombre_dataset}. Saltando...")
        continue

    # Iterar por clasificadores
    for nombre_clasificador, clf in clasificadores.items():
        print(f"\n🔍 Clasificador: {nombre_clasificador}")

        try:
            metricas = evaluar_sampler_holdout(
                nombre=nombre_clasificador,
                sampler_class=PCSMOTE,
                X=X,
                y_bin=y_bin,
                n_iter=5,
                classifier=clf,
                **pcsmote_params
            )

            resultados.append({
                "dataset": nombre_dataset,
                "clasificador": nombre_clasificador,
                **pcsmote_params,
                **metricas
            })

        except Exception as e:
            print(f"❌ Error en {nombre_clasificador} con {nombre_dataset}: {e}")
            continue

# Consolidar resultados
df_resultados = pd.DataFrame(resultados)
df_resultados = df_resultados.sort_values(by=["dataset", "mean_f1"], ascending=[True, False])
df_resultados.reset_index(drop=True, inplace=True)

# Mostrar resumen
print("\n✅ Clasificadores evaluados sobre múltiples datasets:")
print(df_resultados[["dataset", "clasificador", "mean_f1", "std_f1", "mean_bal_acc"]].head())



📂 Evaluando dataset: ECOLI
⚠️ Pocas muestras minoritarias en ecoli. Saltando...

📂 Evaluando dataset: WDBC

🔍 Clasificador: RandomForest
❌ Error en RandomForest con wdbc: PCSMOTE.__init__() got an unexpected keyword argument 'classifier'

🔍 Clasificador: XGBoost
❌ Error en XGBoost con wdbc: PCSMOTE.__init__() got an unexpected keyword argument 'classifier'

🔍 Clasificador: SVM
❌ Error en SVM con wdbc: PCSMOTE.__init__() got an unexpected keyword argument 'classifier'

🔍 Clasificador: LogisticRegression
❌ Error en LogisticRegression con wdbc: PCSMOTE.__init__() got an unexpected keyword argument 'classifier'

🔍 Clasificador: MLP
❌ Error en MLP con wdbc: PCSMOTE.__init__() got an unexpected keyword argument 'classifier'

🔍 Clasificador: kNN
❌ Error en kNN con wdbc: PCSMOTE.__init__() got an unexpected keyword argument 'classifier'

📂 Evaluando dataset: GLASS

🔍 Clasificador: RandomForest
❌ Error en RandomForest con glass: PCSMOTE.__init__() got an unexpected keyword argument 'classifier

KeyError: 'dataset'

In [5]:
from scripts.pc_smote import PCSMOTE
from scripts.evaluacion import evaluar_sampler_holdout
from datasets.config_datasets import config_datasets
from datasets.cargar_dataset import cargar_dataset

from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

from imblearn.over_sampling import SMOTE, BorderlineSMOTE, ADASYN

import pandas as pd
import os

# Crear carpeta de resultados si no existe
os.makedirs("../resultados", exist_ok=True)

# Parámetros PC-SMOTE
pcsmote_params = {
    "k_neighbors": 3,
    "radio_densidad": 0.5,
    "percentil_dist": 75
}

# Clasificadores a evaluar
clasificadores = {
    "RandomForest": RandomForestClassifier,
    "XGBoost": XGBClassifier,
    "SVM": SVC,
    "LogisticRegression": LogisticRegression,
    "MLP": MLPClassifier
}

# Técnicas de sobremuestreo
samplers = {
    "SMOTE": SMOTE,
    "BorderlineSMOTE": BorderlineSMOTE,
    "ADASYN": ADASYN,
    "PC-SMOTE": PCSMOTE
}

# Evaluar combinaciones
resultados = []

for nombre_dataset, cfg in config_datasets.items():
    print(f"\n📁 Evaluando dataset: {nombre_dataset}")
    try:
        X, y = cargar_dataset(
            path=cfg['path'],
            clase_minoria=cfg['clase_minoria'],
            col_features=cfg['col_features'],
            col_target=cfg['col_target'],
            sep=cfg['sep'],
            header=cfg['header']
        )

        for sampler_nombre, sampler_class in samplers.items():
            sampler_kwargs = pcsmote_params if sampler_nombre == "PC-SMOTE" else {}
            for clf_nombre, clf_class in clasificadores.items():
                print(f"🔍 {sampler_nombre} + {clf_nombre}")
                try:
                    resultado = evaluar_sampler_holdout(
                        nombre=sampler_nombre,
                        sampler_class=sampler_class,
                        X=X,
                        y_bin=y,
                        modelo=clf_class,
                        **sampler_kwargs
                    )
                    resultado["dataset"] = nombre_dataset
                    resultado["clasificador"] = clf_nombre
                    resultados.append(resultado)
                except Exception as e:
                    print(f"⚠️ Error al evaluar {sampler_nombre} + {clf_nombre}: {e}")

    except Exception as e:
        print(f"❌ Error al procesar dataset {nombre_dataset}: {e}")

# Consolidar resultados
df_resultados = pd.DataFrame(resultados)

if not df_resultados.empty:
    df_resultados = df_resultados.sort_values(by=["dataset", "mean_f1"], ascending=[True, False])
    df_resultados.reset_index(drop=True, inplace=True)

    # Mostrar en notebook
    display(df_resultados)

    # Guardar a archivo TXT
    ruta_resultado = "../resultados/resultado13072025.txt"
    df_resultados.to_csv(ruta_resultado, sep="\t", index=False)
    print(f"✅ Resultados guardados en: {ruta_resultado}")
else:
    print("⚠️ No se obtuvieron resultados válidos.")



📁 Evaluando dataset: ecoli
🔍 SMOTE + RandomForest
⚠️ Error al evaluar SMOTE + RandomForest: Expected n_neighbors <= n_samples_fit, but n_neighbors = 6, n_samples_fit = 1, n_samples = 1
🔍 SMOTE + XGBoost
⚠️ Error al evaluar SMOTE + XGBoost: Expected n_neighbors <= n_samples_fit, but n_neighbors = 6, n_samples_fit = 1, n_samples = 1
🔍 SMOTE + SVM
⚠️ Error al evaluar SMOTE + SVM: Expected n_neighbors <= n_samples_fit, but n_neighbors = 6, n_samples_fit = 1, n_samples = 1
🔍 SMOTE + LogisticRegression
⚠️ Error al evaluar SMOTE + LogisticRegression: Expected n_neighbors <= n_samples_fit, but n_neighbors = 6, n_samples_fit = 1, n_samples = 1
🔍 SMOTE + MLP
⚠️ Error al evaluar SMOTE + MLP: Expected n_neighbors <= n_samples_fit, but n_neighbors = 6, n_samples_fit = 1, n_samples = 1
🔍 BorderlineSMOTE + RandomForest
🔍 BorderlineSMOTE + XGBoost
🔍 BorderlineSMOTE + SVM
⚠️ Error al evaluar BorderlineSMOTE + SVM: This 'SVC' has no attribute 'predict_proba'
🔍 BorderlineSMOTE + LogisticRegression
🔍 Bor

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

🔍 BorderlineSMOTE + RandomForest
🔍 BorderlineSMOTE + XGBoost
🔍 BorderlineSMOTE + SVM
⚠️ Error al evaluar BorderlineSMOTE + SVM: This 'SVC' has no attribute 'predict_proba'
🔍 BorderlineSMOTE + LogisticRegression
🔍 BorderlineSMOTE + MLP


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

🔍 ADASYN + RandomForest
🔍 ADASYN + XGBoost
🔍 ADASYN + SVM
⚠️ Error al evaluar ADASYN + SVM: This 'SVC' has no attribute 'predict_proba'
🔍 ADASYN + LogisticRegression
🔍 ADASYN + MLP


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

🔍 PC-SMOTE + RandomForest
📌 Total muestras minoritarias: 148
📌 Total muestras mayoritarias: 250
📊 Riesgo - media: 0.1374 | min: 0.0000 | max: 1.0000
📊 Densidad - media: 0.0000 | p25: 0.0000 | p50: 0.0000 | p75: 0.0000
📈 Ejemplo de primeros 10 valores de riesgo: [0.    0.    0.667 0.    0.    0.    0.    0.    1.    0.   ]
📈 Ejemplo de primeros 10 valores de densidad: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
🔎 Muestras con riesgo ∈ [0.4, 0.6]: 0 de 148
🔎 Muestras con densidad > 0: 0 de 148
✅ Muestras seleccionadas para sobremuestreo (intersección): 0
❌ Muestras descartadas (ruido o baja densidad): 148
⚠️ No se encontraron muestras válidas para sobremuestreo. Devolviendo conjunto original.
📌 Total muestras minoritarias: 148
📌 Total muestras mayoritarias: 250
📊 Riesgo - media: 0.1306 | min: 0.0000 | max: 1.0000
📊 Densidad - media: 0.0000 | p25: 0.0000 | p50: 0.0000 | p75: 0.0000
📈 Ejemplo de primeros 10 valores de riesgo: [0.667 0.    0.667 0.    0.    0.    0.    0.    0.    0.333]
📈 Ejemplo de p

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

📌 Total muestras minoritarias: 148
📌 Total muestras mayoritarias: 250
📊 Riesgo - media: 0.1149 | min: 0.0000 | max: 1.0000
📊 Densidad - media: 0.0000 | p25: 0.0000 | p50: 0.0000 | p75: 0.0000
📈 Ejemplo de primeros 10 valores de riesgo: [0.333 0.    0.    0.    0.333 0.    0.    0.333 0.    0.   ]
📈 Ejemplo de primeros 10 valores de densidad: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
🔎 Muestras con riesgo ∈ [0.4, 0.6]: 0 de 148
🔎 Muestras con densidad > 0: 0 de 148
✅ Muestras seleccionadas para sobremuestreo (intersección): 0
❌ Muestras descartadas (ruido o baja densidad): 148
⚠️ No se encontraron muestras válidas para sobremuestreo. Devolviendo conjunto original.
🔍 PC-SMOTE + MLP
📌 Total muestras minoritarias: 148
📌 Total muestras mayoritarias: 250
📊 Riesgo - media: 0.1374 | min: 0.0000 | max: 1.0000
📊 Densidad - media: 0.0000 | p25: 0.0000 | p50: 0.0000 | p75: 0.0000
📈 Ejemplo de primeros 10 valores de riesgo: [0.    0.    0.667 0.    0.    0.    0.    0.    1.    0.   ]
📈 Ejemplo de primeros 1



📌 Total muestras minoritarias: 148
📌 Total muestras mayoritarias: 250
📊 Riesgo - media: 0.1306 | min: 0.0000 | max: 1.0000
📊 Densidad - media: 0.0000 | p25: 0.0000 | p50: 0.0000 | p75: 0.0000
📈 Ejemplo de primeros 10 valores de riesgo: [0.667 0.    0.667 0.    0.    0.    0.    0.    0.    0.333]
📈 Ejemplo de primeros 10 valores de densidad: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
🔎 Muestras con riesgo ∈ [0.4, 0.6]: 0 de 148
🔎 Muestras con densidad > 0: 0 de 148
✅ Muestras seleccionadas para sobremuestreo (intersección): 0
❌ Muestras descartadas (ruido o baja densidad): 148
⚠️ No se encontraron muestras válidas para sobremuestreo. Devolviendo conjunto original.




📌 Total muestras minoritarias: 148
📌 Total muestras mayoritarias: 250
📊 Riesgo - media: 0.1396 | min: 0.0000 | max: 1.0000
📊 Densidad - media: 0.0000 | p25: 0.0000 | p50: 0.0000 | p75: 0.0000
📈 Ejemplo de primeros 10 valores de riesgo: [0.    0.    0.    0.333 0.    0.    0.333 0.    0.333 0.   ]
📈 Ejemplo de primeros 10 valores de densidad: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
🔎 Muestras con riesgo ∈ [0.4, 0.6]: 0 de 148
🔎 Muestras con densidad > 0: 0 de 148
✅ Muestras seleccionadas para sobremuestreo (intersección): 0
❌ Muestras descartadas (ruido o baja densidad): 148
⚠️ No se encontraron muestras válidas para sobremuestreo. Devolviendo conjunto original.




📌 Total muestras minoritarias: 148
📌 Total muestras mayoritarias: 250
📊 Riesgo - media: 0.1284 | min: 0.0000 | max: 1.0000
📊 Densidad - media: 0.0000 | p25: 0.0000 | p50: 0.0000 | p75: 0.0000
📈 Ejemplo de primeros 10 valores de riesgo: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
📈 Ejemplo de primeros 10 valores de densidad: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
🔎 Muestras con riesgo ∈ [0.4, 0.6]: 0 de 148
🔎 Muestras con densidad > 0: 0 de 148
✅ Muestras seleccionadas para sobremuestreo (intersección): 0
❌ Muestras descartadas (ruido o baja densidad): 148
⚠️ No se encontraron muestras válidas para sobremuestreo. Devolviendo conjunto original.




📌 Total muestras minoritarias: 148
📌 Total muestras mayoritarias: 250
📊 Riesgo - media: 0.1149 | min: 0.0000 | max: 1.0000
📊 Densidad - media: 0.0000 | p25: 0.0000 | p50: 0.0000 | p75: 0.0000
📈 Ejemplo de primeros 10 valores de riesgo: [0.333 0.    0.    0.    0.333 0.    0.    0.333 0.    0.   ]
📈 Ejemplo de primeros 10 valores de densidad: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
🔎 Muestras con riesgo ∈ [0.4, 0.6]: 0 de 148
🔎 Muestras con densidad > 0: 0 de 148
✅ Muestras seleccionadas para sobremuestreo (intersección): 0
❌ Muestras descartadas (ruido o baja densidad): 148
⚠️ No se encontraron muestras válidas para sobremuestreo. Devolviendo conjunto original.

📁 Evaluando dataset: glass
🔍 SMOTE + RandomForest
🔍 SMOTE + XGBoost
🔍 SMOTE + SVM
⚠️ Error al evaluar SMOTE + SVM: This 'SVC' has no attribute 'predict_proba'
🔍 SMOTE + LogisticRegression
🔍 SMOTE + MLP


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

🔍 BorderlineSMOTE + RandomForest
🔍 BorderlineSMOTE + XGBoost
🔍 BorderlineSMOTE + SVM
⚠️ Error al evaluar BorderlineSMOTE + SVM: This 'SVC' has no attribute 'predict_proba'
🔍 BorderlineSMOTE + LogisticRegression


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

🔍 BorderlineSMOTE + MLP




🔍 ADASYN + RandomForest
🔍 ADASYN + XGBoost
🔍 ADASYN + SVM
⚠️ Error al evaluar ADASYN + SVM: This 'SVC' has no attribute 'predict_proba'
🔍 ADASYN + LogisticRegression


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

🔍 ADASYN + MLP




🔍 PC-SMOTE + RandomForest
📌 Total muestras minoritarias: 6
📌 Total muestras mayoritarias: 143
📊 Riesgo - media: 0.7778 | min: 0.6667 | max: 1.0000
📊 Densidad - media: 0.1111 | p25: 0.0000 | p50: 0.0000 | p75: 0.2500
📈 Ejemplo de primeros 10 valores de riesgo: [0.667 0.667 1.    0.667 0.667 1.   ]
📈 Ejemplo de primeros 10 valores de densidad: [0.333 0.    0.    0.333 0.    0.   ]
🔎 Muestras con riesgo ∈ [0.4, 0.6]: 0 de 6
🔎 Muestras con densidad > 0: 2 de 6
✅ Muestras seleccionadas para sobremuestreo (intersección): 0
❌ Muestras descartadas (ruido o baja densidad): 6
⚠️ No se encontraron muestras válidas para sobremuestreo. Devolviendo conjunto original.
📌 Total muestras minoritarias: 6
📌 Total muestras mayoritarias: 143
📊 Riesgo - media: 0.5556 | min: 0.3333 | max: 1.0000
📊 Densidad - media: 0.2222 | p25: 0.0000 | p50: 0.1667 | p75: 0.3333
📈 Ejemplo de primeros 10 valores de riesgo: [0.333 0.333 0.333 0.333 1.    1.   ]
📈 Ejemplo de primeros 10 valores de densidad: [0.333 0.667 0.    0

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

📊 Densidad - media: 0.1111 | p25: 0.0000 | p50: 0.0000 | p75: 0.2500
📈 Ejemplo de primeros 10 valores de riesgo: [0.333 0.667 1.    0.667 1.    0.667]
📈 Ejemplo de primeros 10 valores de densidad: [0.    0.333 0.    0.    0.    0.333]
🔎 Muestras con riesgo ∈ [0.4, 0.6]: 0 de 6
🔎 Muestras con densidad > 0: 2 de 6
✅ Muestras seleccionadas para sobremuestreo (intersección): 0
❌ Muestras descartadas (ruido o baja densidad): 6
⚠️ No se encontraron muestras válidas para sobremuestreo. Devolviendo conjunto original.
🔍 PC-SMOTE + MLP
📌 Total muestras minoritarias: 6
📌 Total muestras mayoritarias: 143
📊 Riesgo - media: 0.7778 | min: 0.6667 | max: 1.0000
📊 Densidad - media: 0.1111 | p25: 0.0000 | p50: 0.0000 | p75: 0.2500
📈 Ejemplo de primeros 10 valores de riesgo: [0.667 0.667 1.    0.667 0.667 1.   ]
📈 Ejemplo de primeros 10 valores de densidad: [0.333 0.    0.    0.333 0.    0.   ]
🔎 Muestras con riesgo ∈ [0.4, 0.6]: 0 de 6
🔎 Muestras con densidad > 0: 2 de 6
✅ Muestras seleccionadas para sob



🔍 SMOTE + XGBoost
🔍 SMOTE + SVM
⚠️ Error al evaluar SMOTE + SVM: This 'SVC' has no attribute 'predict_proba'
🔍 SMOTE + LogisticRegression
🔍 SMOTE + MLP


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

🔍 BorderlineSMOTE + RandomForest
🔍 BorderlineSMOTE + XGBoost
🔍 BorderlineSMOTE + SVM
⚠️ Error al evaluar BorderlineSMOTE + SVM: This 'SVC' has no attribute 'predict_proba'
🔍 BorderlineSMOTE + LogisticRegression
🔍 BorderlineSMOTE + MLP


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

🔍 ADASYN + RandomForest
🔍 ADASYN + XGBoost
🔍 ADASYN + SVM
⚠️ Error al evaluar ADASYN + SVM: This 'SVC' has no attribute 'predict_proba'
🔍 ADASYN + LogisticRegression
🔍 ADASYN + MLP


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

🔍 PC-SMOTE + RandomForest
📌 Total muestras minoritarias: 38
📌 Total muestras mayoritarias: 169
📊 Riesgo - media: 0.8596 | min: 0.6667 | max: 1.0000
📊 Densidad - media: 0.0000 | p25: 0.0000 | p50: 0.0000 | p75: 0.0000
📈 Ejemplo de primeros 10 valores de riesgo: [1.    1.    1.    0.667 0.667 0.667 1.    1.    0.667 0.667]
📈 Ejemplo de primeros 10 valores de densidad: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
🔎 Muestras con riesgo ∈ [0.4, 0.6]: 0 de 38
🔎 Muestras con densidad > 0: 0 de 38
✅ Muestras seleccionadas para sobremuestreo (intersección): 0
❌ Muestras descartadas (ruido o baja densidad): 38
⚠️ No se encontraron muestras válidas para sobremuestreo. Devolviendo conjunto original.
📌 Total muestras minoritarias: 38
📌 Total muestras mayoritarias: 169
📊 Riesgo - media: 0.9035 | min: 0.3333 | max: 1.0000
📊 Densidad - media: 0.0000 | p25: 0.0000 | p50: 0.0000 | p75: 0.0000
📈 Ejemplo de primeros 10 valores de riesgo: [1.    0.667 1.    1.    1.    1.    0.667 0.333 0.667 1.   ]
📈 Ejemplo de primer

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

📊 Densidad - media: 0.0000 | p25: 0.0000 | p50: 0.0000 | p75: 0.0000
📈 Ejemplo de primeros 10 valores de riesgo: [0.667 1.    1.    1.    0.667 1.    0.667 0.667 0.667 0.667]
📈 Ejemplo de primeros 10 valores de densidad: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
🔎 Muestras con riesgo ∈ [0.4, 0.6]: 0 de 38
🔎 Muestras con densidad > 0: 0 de 38
✅ Muestras seleccionadas para sobremuestreo (intersección): 0
❌ Muestras descartadas (ruido o baja densidad): 38
⚠️ No se encontraron muestras válidas para sobremuestreo. Devolviendo conjunto original.
📌 Total muestras minoritarias: 38
📌 Total muestras mayoritarias: 169
📊 Riesgo - media: 0.8421 | min: 0.3333 | max: 1.0000
📊 Densidad - media: 0.0000 | p25: 0.0000 | p50: 0.0000 | p75: 0.0000
📈 Ejemplo de primeros 10 valores de riesgo: [0.667 0.667 1.    0.667 1.    1.    1.    1.    0.333 0.667]
📈 Ejemplo de primeros 10 valores de densidad: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
🔎 Muestras con riesgo ∈ [0.4, 0.6]: 0 de 38
🔎 Muestras con densidad > 0: 0 de 38
✅ Muestr

Unnamed: 0,técnica,mean_precision,mean_recall,mean_f1,std_f1,mean_roc_auc,mean_bal_acc,dataset,clasificador
0,BorderlineSMOTE,0.0,0.0,0.0,0.0,0.653,0.5,ecoli,RandomForest
1,BorderlineSMOTE,0.0,0.0,0.0,0.0,0.5,0.5,ecoli,XGBoost
2,BorderlineSMOTE,0.0,0.0,0.0,0.0,0.964,0.5,ecoli,LogisticRegression
3,BorderlineSMOTE,0.0,0.0,0.0,0.0,0.006,0.5,ecoli,MLP
4,SMOTE,0.75,0.733333,0.731429,0.3741,1.0,0.865054,glass,RandomForest
5,BorderlineSMOTE,0.75,0.733333,0.731429,0.3741,1.0,0.865054,glass,RandomForest
6,ADASYN,0.75,0.666667,0.671429,0.38226,1.0,0.83172,glass,RandomForest
7,SMOTE,0.52,0.733333,0.607143,0.321032,0.989247,0.856989,glass,XGBoost
8,ADASYN,0.536667,0.666667,0.588095,0.368517,0.988172,0.825269,glass,XGBoost
9,BorderlineSMOTE,0.486667,0.666667,0.559524,0.339901,0.988172,0.823656,glass,XGBoost


✅ Resultados guardados en: ../resultados/resultado13072025.txt
