# Evaluaci√≥n de PC-SMOTE con Grid Search en el dataset Shuttle (Generaci√≥n de caso base y datasets aumentados)


In [1]:
import sys
sys.path.append("../scripts")
sys.path.append("../datasets")

import os

# Rutas de datasets y resultados
RUTA_DATASETS_BASE = "../datasets/datasets_aumentados/base/"
RUTA_DATASETS_AUMENTADOS = "../datasets/datasets_aumentados/"
RUTA_DATASETS_CLASICOS = "../datasets/datasets_aumentados/resampler_clasicos/"
DIRECTORIO_SALIDA = "../resultados"

os.makedirs(DIRECTORIO_SALIDA, exist_ok=True)
os.makedirs(RUTA_DATASETS_CLASICOS, exist_ok=True)


In [2]:
import gc, time  # gc: liberaci√≥n expl√≠cita de memoria entre ejecuciones; time: medici√≥n de duraci√≥n de b√∫squedas
from dataclasses import dataclass, asdict  # dataclass: estructura limpia para registrar resultados y metadatos de cada combinaci√≥n
import json  # guardar resultados intermedios en formato JSON

import numpy as np  # operaciones num√©ricas y manipulaci√≥n de vectores/matrices
import pandas as pd  # manejo de estructuras tabulares (dataframes) para consolidar resultados


# Utilizamos validaci√≥n estratificada + b√∫squeda aleatoria de hiperpar√°metros
from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV

# M√©tricas utilizadas en CV y test (todas macro para evitar sesgos por clase mayoritaria)
from sklearn.metrics import (
    f1_score,
    balanced_accuracy_score,
    recall_score,
    make_scorer
)

# Cada modelo se ejecuta dentro de un Pipeline para permitir transformaciones futuras
from sklearn.pipeline import Pipeline

# Modelo principal evaluado (Random Forest)
from sklearn.ensemble import RandomForestClassifier

# Suprimir warnings de convergencia innecesarios (SVM no se usa en esta fase)
from sklearn.exceptions import ConvergenceWarning
import warnings
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# Controlar comportamiento en entornos con m√∫ltiples n√∫cleos
# (evita paralelismo interno conflictivo con n_jobs de sklearn)
import os

# Estado aleatorio fijo para reproducibilidad entre ejecuciones
RANDOM_STATE = 42
       
# En Shuttle aumentado omitimos SVM por inestabilidad del ROC-AUC y l√≠mites computacionales
OMITIR_SVM_EN_SHUTTLE_AUMENTADO = True

# Archivo Excel consolidado con resultados CV y Test para todas las t√©cnicas
NOMBRE_ARCHIVO_EXCEL = os.path.join(DIRECTORIO_SALIDA, "resultados_RS_cv_vs_test.xlsx")

In [3]:
from pathlib import Path
import re

# =========================
# Estructuras de datos
# =========================
@dataclass
class DatasetCombination:
    dataset_logico: str
    tipo_combination: str      # "base" | "clasico" | "pcsmote"
    ruta_train_csv: str
    ruta_test_csv: str

    tecnica_aumento: str = "base"

    # Par√°metros gen√©ricos (cl√°sicos / base)
    valor_densidad: str | int | None = "--"
    valor_riesgo: str | int | None = "--"
    criterio_pureza: str | None = "--"

    # Par√°metros PCSMOTE (si aplica)
    percentil_radio_distancia: str | int | None = "--"
    percentil_riesgo: str | int | None = "--"
    umbral_densidad: str | None = "--"
    umbral_riesgo: str | None = "--"

    tipo_pureza: str = "--"           # PE.. o Upp.. del nombre de archivo
    nombre_configuracion: str = ""    # PRD.._PR.._CP.._UD.._UR.._.._I.._SV.._SG..

    grado_limpieza: str | int = "--"  # I0, I1, I3, etc.
    total_muestras_train: int | None = None
    tamanio_dataset: int | None = None  # tama√±o total train+test (si lo ten√©s)
    sinteticos_generados: int = 0
    semillas_validas: int = 0


@dataclass
class RegistroRendimiento:
    dataset_logico: str
    tipo_combination: str
    nombre_modelo_aprendizaje: str
    nombre_configuracion: str
    tecnica_aumento: str
    valor_densidad: str
    valor_riesgo: str
    criterio_pureza: str
    grado_limpieza: str

    cantidad_train: int
    cantidad_test: int
    cantidad_caracteristicas: int

    # M√©tricas CV
    cv_f1_macro: float
    cv_balanced_accuracy: float
    cv_recall_macro: float

    # M√©tricas Test
    test_f1_macro: float
    test_balanced_accuracy: float
    test_recall_macro: float

    mejores_hiperparametros: str
    tiempo_busqueda_seg: float




def enumerar_combinaciones_base_y_aumentadas(
    ruta_base,
    ruta_clasicos,
    ruta_aumentados,
    verbose=True
):
    combinaciones = []
    cont_combinaciones = 0

    # Mapear (dataset_logico, grado_limpieza) ‚Üí tama√±o_train_base
    tamanio_train_base_por_dataset_y_I = {}

    # ==========================================================
    # 1) BASE
    #    train: {dataset}_I{I}_tm{n}_train.csv
    #    test : {dataset}_tm{n}_test.csv
    # ==========================================================
    if verbose:
        print(f"üìÇ Explorando carpeta base: {ruta_base}")

    archivos_base = os.listdir(ruta_base)

    for nombre in archivos_base:
        if not nombre.endswith("_train.csv"):
            if verbose:
                print(f"  ‚ö™ Omitido (no es *_train.csv): {nombre}")
            continue

        m = re.match(r"(.+?)_I(\d+)_tm(\d+)_train\.csv$", nombre)
        if not m:
            if verbose:
                print(f"  ‚ö™ No coincide patr√≥n base con I*_tm*_train: {nombre}")
            continue

        dataset_logico = m.group(1)
        grado_limpieza = int(m.group(2))
        total_muestras_train = int(m.group(3))

        # Registrar tama√±o de train base para este (dataset, I)
        clave_base = (dataset_logico, grado_limpieza)
        tamanio_train_base_por_dataset_y_I[clave_base] = total_muestras_train

        ruta_train_csv = os.path.join(ruta_base, nombre)

        # Buscar test correspondiente: {dataset}_tm{n}_test.csv
        patron_test = re.compile(rf"^{re.escape(dataset_logico)}_tdataset(\d+)_tm(\d+)_test\.csv$")
        nombre_test = None
        n_test_detectado = None

        for nombre_candidato in archivos_base:
            m_test = patron_test.match(nombre_candidato)
            if m_test:
                # Si hubiera m√°s de uno, nos quedamos con el de mayor tm
                n_tm = int(m_test.group(2))
                tamanio_dataset=int(m_test.group(1))

                if n_test_detectado is None or n_tm > n_test_detectado:
                    n_test_detectado = n_tm
                    nombre_test = nombre_candidato

        if nombre_test is None:
            if verbose:
                print(f"  ‚ö†Ô∏è  Falta test para dataset base '{dataset_logico}', se omite {nombre}")
            continue

        ruta_test_csv = os.path.join(ruta_base, nombre_test)

        cont_combinaciones += 1
        print(f"#{cont_combinaciones}  ‚úÖ Agregado base: {nombre} combinado con {nombre_test}")

        combinaciones.append(DatasetCombination(
            dataset_logico=dataset_logico,
            tipo_combination="base",
            ruta_train_csv=ruta_train_csv,
            ruta_test_csv=ruta_test_csv,
            tecnica_aumento="base",
            valor_densidad=None,
            valor_riesgo=None,
            criterio_pureza=None,
            grado_limpieza=grado_limpieza,
            total_muestras_train=total_muestras_train,
            tamanio_dataset=tamanio_dataset
        ))

    # ==========================================================
    # 2) CL√ÅSICOS
    #    {tecnica}_{dataset}_I{I}_sg{sg}_train.csv
    #    test base: {dataset}_tm{n}_test.csv (mismo criterio que base)
    # ==========================================================
    if verbose:
        print(f"üìÇ Explorando carpeta cl√°sicos: {ruta_clasicos}")

    archivos_clasicos = os.listdir(ruta_clasicos)

    for nombre in archivos_clasicos:
        if not nombre.endswith("_train.csv"):
            continue

        # ejemplo: adasyn_us_crime_I1_sg120_train.csv
        m = re.match(r"(.+?)_(.+?)_I(\d+)_sg(\d+)_train\.csv$", nombre)
        if not m:
            if verbose:
                print(f"  ‚ö†Ô∏è  No cumple patr√≥n cl√°sicos: {nombre}")
            continue

        tecnica = m.group(1)
        dataset_logico = m.group(2)
        grado_limpieza = int(m.group(3))
        sinteticos_generados = int(m.group(4))

        # Recuperar tama√±o de train base para este dataset y este I
        clave_base = (dataset_logico, grado_limpieza)
        total_muestras_train = tamanio_train_base_por_dataset_y_I.get(clave_base)

        if total_muestras_train is None:
            if verbose:
                print(
                    f"  ‚ö†Ô∏è  No se encontr√≥ tama√±o de train base para "
                    f"(dataset='{dataset_logico}', I={grado_limpieza}). Se omite {nombre}"
                )
            continue
        
        ruta_train_csv = os.path.join(ruta_clasicos, nombre)

        # Buscar test correspondiente en carpeta base
        patron_test = re.compile(rf"^{re.escape(dataset_logico)}_tdataset(\d+)_tm(\d+)_test\.csv$")
        nombre_test = None
        n_test_detectado = None

        for nombre_candidato in archivos_base:
            m_test = patron_test.match(nombre_candidato)
            if m_test:
                n_tm = int(m_test.group(2))
                tamanio_dataset = int(m_test.group(1))
                if n_test_detectado is None or n_tm > n_test_detectado:
                    n_test_detectado = n_tm
                    nombre_test = nombre_candidato

        if nombre_test is None:
            if verbose:
                print(f"  ‚ö†Ô∏è  No hay test base para dataset '{dataset_logico}', se omite {nombre}")
            continue

        ruta_test_csv = os.path.join(ruta_base, nombre_test)

        cont_combinaciones += 1
        print(f"#{cont_combinaciones}  ‚úÖ Agregado cl√°sico: {nombre} combinado con {nombre_test}")

        combinaciones.append(DatasetCombination(
            dataset_logico=dataset_logico,
            tipo_combination="clasico",
            ruta_train_csv=ruta_train_csv,
            ruta_test_csv=ruta_test_csv,
            tecnica_aumento=tecnica.lower(),
            valor_densidad=None,
            valor_riesgo=None,
            criterio_pureza=None,
            grado_limpieza=grado_limpieza,
            total_muestras_train=total_muestras_train,
            sinteticos_generados=sinteticos_generados,
            tamanio_dataset=tamanio_dataset
        ))

    # ==========================================================
    # 3) PC-SMOTE (nuevo patr√≥n)
    #
    # pcs_{dataset}_PRD{prd}_PR{pr}_CP{ent|prop}_UD{ud3}_{PE..|Ppp..}_I{iso}_SG{sg}_train.csv
    #
    # Ej:
    #   pcs_ecoli_PRD35_PR35_CPent_UD080_PE45_I0_SG120_train.csv
    #   pcs_ecoli_PRD35_PR35_CPprop_UD080_Ppp041_I0_SG007_train.csv
    #
    # valor_densidad  ‚Üí percentil radio distancia (PRD)
    # valor_riesgo    ‚Üí percentil riesgo (PR)
    # criterio_pureza ‚Üí "entropia" / "proporcion"
    # grado_limpieza  ‚Üí iso (I*)
    # sinteticos_generados ‚Üí SG
    # ==========================================================
    if verbose:
        print(f"üìÇ Explorando carpeta aumentados: {ruta_aumentados}")

    archivos_aumentados = os.listdir(ruta_aumentados)

    patron_pcsmote = re.compile(
        r"^pcs_(?P<dataset>.+?)_"
        r"PRD(?P<prd>\d+)_"
        r"PR(?P<pr>\d+)_"
        r"CP(?P<cp>(?:ent|prop))_"
        r"UD(?P<ud>\d{3})_"
        r"(?P<tipo_pureza>(?:PE\d+|Upp\d{3}))_"
        r"UR(?P<ur>\d{3})_"
        r"I(?P<iso>\d+)_"
        r"SV(?P<sv>\d+)_"
        r"SG(?P<sg>\d+)_train\.csv$"
    )

    for nombre in archivos_aumentados:
        if not nombre.endswith("_train.csv"):
            continue

        m = patron_pcsmote.match(nombre)
        if not m:
            if verbose:
                print(f"  ‚ö™ Omitido (no es pcs v√°lido): {nombre}")
            continue

        dataset_logico = m.group("dataset")
        valor_densidad = int(m.group("prd"))   # percentil radio distancia
        valor_riesgo   = int(m.group("pr"))    # percentil riesgo
        cp_code        = m.group("cp")         # "ent" | "prop"
        ud_str       = m.group("ud")        # umbral densidad en %, si despu√©s lo quer√©s usar
        ur_str       = m.group("ur")        # umbral densidad en %, si despu√©s lo quer√©s usar
        tipo_pureza = m.group("tipo_pureza")  # PE.. / Ppp.., si lo necesit√°s luego
        grado_limpieza = int(m.group("iso"))   # I*
        semillas_validas = int(m.group("sv"))
        sinteticos_generados = int(m.group("sg"))

        print(f"  ‚û°Ô∏è  Descifrado pcsmote: dataset={dataset_logico}, prd={valor_densidad}, pr={valor_riesgo}, cp={cp_code}, ud={ud_str}, ur={ur_str}, tipo_pureza={tipo_pureza}, I={grado_limpieza}, sv={semillas_validas}, sg={sinteticos_generados}")

        if cp_code == "ent":
            criterio_pureza = "entropia"
        else:
            criterio_pureza = "proporcion"

        # nombre_configuracion EXACTO seg√∫n el patr√≥n
        nombre_configuracion = (
            f"PRD{valor_densidad}_"
            f"PR{valor_riesgo}_"
            f"CP{cp_code}_"
            f"UD{ud_str}_"
            f"UR{ur_str}_"
            f"{tipo_pureza}_"
            f"I{grado_limpieza}_"
            f"SV{semillas_validas}_"
            f"SG{sinteticos_generados}"
        )            

        ruta_train_csv = os.path.join(ruta_aumentados, nombre)

        # Buscar test correspondiente en carpeta base
        patron_test = re.compile(rf"^{re.escape(dataset_logico)}_tdataset(\d+)_tm(\d+)_test\.csv$")
        nombre_test = None
        n_test_detectado = None

        for nombre_candidato in archivos_base:
            m_test = patron_test.match(nombre_candidato)
            if m_test:
                n_tm = int(m_test.group(2))
                tamanio_dataset = int(m_test.group(1))
                if n_test_detectado is None or n_tm > n_test_detectado:
                    n_test_detectado = n_tm
                    nombre_test = nombre_candidato

        if nombre_test is None:
            if verbose:
                print(f"  ‚ö†Ô∏è  No hay test base para dataset '{dataset_logico}', se omite {nombre}")
            continue

        ruta_test_csv = os.path.join(ruta_base, nombre_test)

        cont_combinaciones += 1
        print(f"#{cont_combinaciones}  ‚úÖ Agregado pcsmote: {nombre} combinado con {nombre_test}")

        combinaciones.append(DatasetCombination(
            dataset_logico=dataset_logico,
            tipo_combination="pcsmote",
            ruta_train_csv=ruta_train_csv,
            ruta_test_csv=ruta_test_csv,
            tecnica_aumento="pcsmote",
            valor_densidad=valor_densidad,
            valor_riesgo=valor_riesgo,

            criterio_pureza=criterio_pureza,
            percentil_radio_distancia=valor_densidad,
            percentil_riesgo=valor_riesgo,  
            umbral_densidad=ud_str,
            umbral_riesgo=ur_str,

            grado_limpieza=grado_limpieza,
            sinteticos_generados=sinteticos_generados,
            semillas_validas=semillas_validas,
            tipo_pureza=tipo_pureza,                 
            nombre_configuracion=nombre_configuracion,    
            tamanio_dataset=tamanio_dataset
    
        ))

    if verbose:
        print(f"üìä Total combinaciones descubiertas: {len(combinaciones)}")

    return combinaciones


print("üîé Enumerando combinaciones base y aumentadas...")

combinaciones = enumerar_combinaciones_base_y_aumentadas(
    ruta_base=RUTA_DATASETS_BASE,
    ruta_clasicos=RUTA_DATASETS_CLASICOS,
    ruta_aumentados=RUTA_DATASETS_AUMENTADOS,
    verbose=True
)

if not combinaciones:
    print("‚ùå No se encontraron combinaciones de datasets.")


datasets_con_base = {c.dataset_logico for c in combinaciones if c.tipo_combination == "base"}
if not datasets_con_base:
    print("‚ùå No hay datasets base para comparar.")



üîé Enumerando combinaciones base y aumentadas...
üìÇ Explorando carpeta base: ../datasets/datasets_aumentados/base/
#1  ‚úÖ Agregado base: ecoli_I0_tm268_train.csv combinado con ecoli_tdataset336_tm68_test.csv
#2  ‚úÖ Agregado base: ecoli_I1_tm262_train.csv combinado con ecoli_tdataset336_tm68_test.csv
#3  ‚úÖ Agregado base: ecoli_I3_tm258_train.csv combinado con ecoli_tdataset336_tm68_test.csv
  ‚ö™ Omitido (no es *_train.csv): ecoli_tdataset336_tm68_test.csv
#4  ‚úÖ Agregado base: gear_vibration_I0_tm72_train.csv combinado con gear_vibration_tdataset90_tm18_test.csv
#5  ‚úÖ Agregado base: gear_vibration_I1_tm68_train.csv combinado con gear_vibration_tdataset90_tm18_test.csv
#6  ‚úÖ Agregado base: gear_vibration_I3_tm68_train.csv combinado con gear_vibration_tdataset90_tm18_test.csv
  ‚ö™ Omitido (no es *_train.csv): gear_vibration_tdataset90_tm18_test.csv
#7  ‚úÖ Agregado base: glass_I0_tm171_train.csv combinado con glass_tdataset214_tm43_test.csv
#8  ‚úÖ Agregado base: glass_I1_t

In [4]:
EXCLUIR_DATASETS = {
    "shuttle",
    "iris",
    "glass",
    "heart",
    "wdbc",
    "ecoli",
    "us_crime",
    # "predict_faults",
    "gear_vibration",
    "telco_churn",
}

def construir_lista_plana_de_tareas(dataset_combinations, orden_modelos,
                                    excluir_datasets=EXCLUIR_DATASETS, verbose=True):
    """
    Lista plana de tareas (modelo, combinaci√≥n) aplicando exclusi√≥n por dataset.
    Nota: La pol√≠tica SVM/Shuttle no aplica ac√° porque esta notebook eval√∫a RandomForest.
    """
    tareas = []
    excluidos_por_dataset = 0

    for nombre_modelo in orden_modelos:
        for combo in dataset_combinations:
            ds = combo.dataset_logico.lower()

            if ds in (excluir_datasets or set()):
                excluidos_por_dataset += 1
                continue

            tareas.append((nombre_modelo, combo))

    if verbose:
        print(f"üßÆ Tareas planificadas: {len(tareas)} (excluidos por dataset: {excluidos_por_dataset})")
    return tareas


def construir_estimador_y_espacio_random_forest():
    est = Pipeline([
        ('classifier', RandomForestClassifier(
            random_state=RANDOM_STATE,
            n_jobs=1,
            bootstrap=True,
            oob_score=False,
            n_estimators=150,
            max_depth=None,
            max_features='sqrt',
            min_samples_split=2,
            min_samples_leaf=1,
            class_weight=None,
            criterion='gini'
        ))
    ])

    # Espacio chico y controlado (4 combinaciones posibles)
    space = {
        "classifier__n_estimators": [150, 300],
        "classifier__max_features": ["sqrt", "log2"],
    }
    return est, space


REGISTRO_MODELOS = {
    "RandomForest": construir_estimador_y_espacio_random_forest,
}
ORDEN_MODELOS = ["RandomForest"]


tareas = construir_lista_plana_de_tareas(
    dataset_combinations=combinaciones,
    orden_modelos=ORDEN_MODELOS,
    excluir_datasets=EXCLUIR_DATASETS,
    verbose=True
)

total_tareas = len(tareas)
print(f"üì¶ Total de tareas planificadas: {total_tareas}")


üßÆ Tareas planificadas: 16 (excluidos por dataset: 1408)
üì¶ Total de tareas planificadas: 16


In [5]:
# Scoring para RandomizedSearchCV
SCORING_REFIT = "f1_macro"
SCORING_MULTIPLE = {
    "f1_macro": "f1_macro",
    "balanced_accuracy": "balanced_accuracy",
    "recall_macro": make_scorer(recall_score, average="macro"),
}


def _contar_combinaciones_posibles_en_space(space):
    """
    Cuenta combinaciones si space es un dict de listas discretas.
    Si encuentra algo que no es lista/tupla/np.ndarray, devuelve None.
    """
    total = 1
    for k, v in space.items():
        if isinstance(v, (list, tuple, np.ndarray)):
            total *= len(v)
        else:
            return None
    return total


def ejecutar_rs_y_comparar_cv_con_test(
    estimator,
    space,
    X_train,
    y_train,
    X_test,
    y_test,
    configuracion_busqueda,
    verbose=0,
):
    """
    Ejecuta RandomizedSearchCV y devuelve:
      - mejores params
      - tiempo
      - m√©tricas CV del mejor candidato
      - m√©tricas Test con best_estimator_
    """
    inicio = time.perf_counter()

    n_iter_solicitado = int(configuracion_busqueda["n_iter"])
    n_posibles = _contar_combinaciones_posibles_en_space(space)

    # Si el space es discreto (como tu RF), no tiene sentido pedir m√°s iteraciones que combinaciones
    if n_posibles is not None:
        n_iter_efectivo = min(n_iter_solicitado, n_posibles)
    else:
        n_iter_efectivo = n_iter_solicitado

    search = RandomizedSearchCV(
        estimator=estimator,
        param_distributions=space,
        n_iter=n_iter_efectivo,
        scoring=SCORING_MULTIPLE,
        refit=SCORING_REFIT,
        cv=configuracion_busqueda["cv"],
        random_state=RANDOM_STATE,
        n_jobs=configuracion_busqueda["n_jobs"],
        verbose=verbose,
    )

    search.fit(X_train, y_train)
    elapsed = time.perf_counter() - inicio

    cv_results = search.cv_results_
    best_idx = search.best_index_

    cv_f1       = float(cv_results["mean_test_f1_macro"][best_idx])
    cv_bacc     = float(cv_results["mean_test_balanced_accuracy"][best_idx])
    cv_recall_m = float(cv_results["mean_test_recall_macro"][best_idx])

    best_est = search.best_estimator_
    y_pred = best_est.predict(X_test)

    test_f1       = float(f1_score(y_test, y_pred, average="macro"))
    test_bacc     = float(balanced_accuracy_score(y_test, y_pred))
    test_recall_m = float(recall_score(y_test, y_pred, average="macro"))

    return dict(
        mejores_params=search.best_params_,
        tiempo=float(elapsed),
        n_iter_efectivo=int(n_iter_efectivo),
        n_combinaciones_posibles=(int(n_posibles) if n_posibles is not None else None),
        cv=dict(
            f1=cv_f1,
            bacc=cv_bacc,
            recall_macro=cv_recall_m,
        ),
        test=dict(
            f1=test_f1,
            bacc=test_bacc,
            recall_macro=test_recall_m,
        ),
    )


In [6]:
N_ITER_BUSQUEDA_POR_DEFECTO = 4

# =========================
# Utilidades de datos
# =========================
def cargar_matriz_caracteristicas_y_etiquetas_desde_csv(ruta_csv):
    """Lee un CSV y devuelve (X, y). Usa 'target' si existe, si no la √∫ltima columna como y."""
    df = pd.read_csv(ruta_csv)
    if "target" in df.columns:
        X = df.drop(columns=["target"]).to_numpy(dtype=np.float32, copy=False)
        y = df["target"].to_numpy()
    else:
        X = df.iloc[:, :-1].to_numpy(dtype=np.float32, copy=False)
        y = df.iloc[:, -1].to_numpy()
    return X, y


def definir_configuracion_busqueda_para_dataset(X_train, nombre_dataset_logico, tipo_combination):
    """
    Define configuraci√≥n de b√∫squeda SOLO para el tuning de base.
    - Shuttle aumentado -> CV=2 (no aplica si solo tuneamos base)
    - Shuttle o n>=10000 -> CV=3
    - resto -> CV=5
    """
    n_muestras = X_train.shape[0]
    es_shuttle = nombre_dataset_logico.lower() == "shuttle"

    # Como tuneamos SOLO base, este branch normalmente no se usa.
    if es_shuttle and tipo_combination != "base":
        cv = StratifiedKFold(n_splits=2, shuffle=True, random_state=RANDOM_STATE)
    elif es_shuttle or n_muestras >= 10000:
        cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE)
    else:
        cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)

    return {
        "cv": cv,
        "n_iter": N_ITER_BUSQUEDA_POR_DEFECTO,
        "n_jobs": 1,
    }


def entrenar_y_evaluar_con_params_fijos(
    estimator,
    mejores_params,
    X_train,
    y_train,
    X_test,
    y_test,
):
    """
    Entrena un estimador con hiperpar√°metros fijos y eval√∫a en Test.
    Retorna m√©tricas (solo Test) y el tiempo de entrenamiento+predicci√≥n.
    """
    inicio = time.perf_counter()

    estimator.set_params(**mejores_params)
    estimator.fit(X_train, y_train)

    y_pred = estimator.predict(X_test)

    elapsed = time.perf_counter() - inicio

    test_f1 = float(f1_score(y_test, y_pred, average="macro"))
    test_bacc = float(balanced_accuracy_score(y_test, y_pred))
    test_recall_m = float(recall_score(y_test, y_pred, average="macro"))

    return dict(
        tiempo=float(elapsed),
        test=dict(
            f1=test_f1,
            bacc=test_bacc,
            recall_macro=test_recall_m,
        ),
    )


def _clave_contexto(combo, nombre_modelo):
    # clave por dataset + modelo + grado de limpieza (I)
    return (combo.dataset_logico.lower(), nombre_modelo, str(combo.grado_limpieza))


# =========================
# Paso 1: TUNING SOLO BASE
# =========================
best_params_por_contexto = {}
ruta_best_params = os.path.join(DIRECTORIO_SALIDA, "best_params_por_contexto.json")
total_base = len(set(_clave_contexto(combo, nombre_modelo)
                     for nombre_modelo, combo in tareas
                     if combo.tipo_combination == "base"))

contador_base = 0

print("\n" + "="*100)
print("üéØ ETAPA 1: TUNING SOLO EN 'BASE' (por dataset + modelo + I)")
print("="*100)

for nombre_modelo, combo in tareas:

    if combo.tipo_combination != "base":
        continue

    clave = _clave_contexto(combo, nombre_modelo)

    if clave in best_params_por_contexto:
        continue

    contador_base += 1

    print(f"\n{'='*80}")
    print(f"üéØ TUNING BASE [{contador_base}/{total_base}] Dataset: {combo.dataset_logico} | "
          f"Modelo: {nombre_modelo} | I={combo.grado_limpieza}")
    print(f"üìÇ Train(base): {os.path.basename(combo.ruta_train_csv)}")
    print(f"üìÇ Test (base): {os.path.basename(combo.ruta_test_csv)}")

    # cargar base
    try:
        X_train, y_train = cargar_matriz_caracteristicas_y_etiquetas_desde_csv(combo.ruta_train_csv)
        X_test,  y_test  = cargar_matriz_caracteristicas_y_etiquetas_desde_csv(combo.ruta_test_csv)
    except Exception as e:
        print(f"‚ùå Error leyendo CSV base: {e}")
        continue

    configuracion_busqueda = definir_configuracion_busqueda_para_dataset(
        X_train, combo.dataset_logico, combo.tipo_combination
    )

    estimator, space = REGISTRO_MODELOS[nombre_modelo]()

    print(f"‚öôÔ∏è  RS(base): n_iter={configuracion_busqueda['n_iter']} (auto-ajustado dentro de ejecutar_rs), "
          f"folds={configuracion_busqueda['cv'].n_splits}, n_jobs={configuracion_busqueda['n_jobs']}")
    print("üöÄ Iniciando RandomizedSearchCV en BASE...")

    try:
        resultados = ejecutar_rs_y_comparar_cv_con_test(
            estimator, space, X_train, y_train, X_test, y_test,
            configuracion_busqueda=configuracion_busqueda,
            verbose=1
        )
    except Exception as e:
        print(f"‚ùå Error durante RS(base): {e}")
        continue

    best_params_por_contexto[clave] = {
        "mejores_params": resultados["mejores_params"],
        "cv_f1_macro": resultados["cv"]["f1"],
        "cv_balanced_accuracy": resultados["cv"]["bacc"],
        "cv_recall_macro": resultados["cv"]["recall_macro"],
    }


    # guardado incremental por si se corta
    try:
        serializable = {str(k): v for k, v in best_params_por_contexto.items()}
        with open(ruta_best_params, "w", encoding="utf-8") as f:
            json.dump(serializable, f, ensure_ascii=False, indent=2)
    except Exception as e:
        print(f"‚ö†Ô∏è No se pudo guardar best_params JSON: {e}")

    print(f"‚úÖ RS(base) completado en {resultados['tiempo']:.2f} s | n_iter efectivo={resultados.get('n_iter_efectivo', '--')}")
    print(f"üìä BASE F1(CV): {resultados['cv']['f1']:.4f} | F1(Test): {resultados['test']['f1']:.4f}")
    print(f"üß† Best params: {resultados['mejores_params']}")


print(f"DEBUG: tunings ejecutados = {len(best_params_por_contexto)} | esperado = {total_base}")


if not best_params_por_contexto:
    raise RuntimeError("‚ùå No se obtuvo ning√∫n best_params (no se encontr√≥ base o fall√≥ el tuning).")


# =========================
# Paso 2: EVALUACI√ìN MASIVA con params fijos
# =========================
registros = []
inicio_total = time.perf_counter()

print("\n" + "="*100)
print("üèÅ ETAPA 2: EVALUACI√ìN MASIVA (params fijos) sobre TODOS los train CSV generados")
print("="*100)

for idx, (nombre_modelo, combo) in enumerate(tareas, start=1):

    print(f"\n{'='*80}")
    print(f"üèÅ [{idx}/{total_tareas}] Dataset: {combo.dataset_logico} | "
          f"Tipo: {combo.tipo_combination} | Modelo: {nombre_modelo}")
    print(f"üìÇ Train: {os.path.basename(combo.ruta_train_csv)}")

    clave = _clave_contexto(combo, nombre_modelo)
    if clave not in best_params_por_contexto:
        print(f"‚ö†Ô∏è Sin best_params para {clave}. Se omite.")
        continue

    contexto = best_params_por_contexto[clave]    
    mejores_params = contexto["mejores_params"]

    # cargar train/test (de cada combinaci√≥n)
    try:
        X_train, y_train = cargar_matriz_caracteristicas_y_etiquetas_desde_csv(combo.ruta_train_csv)
        X_test,  y_test  = cargar_matriz_caracteristicas_y_etiquetas_desde_csv(combo.ruta_test_csv)
    except Exception as e:
        print(f"‚ùå Error leyendo CSV: {e}")
        continue

    estimator, _space = REGISTRO_MODELOS[nombre_modelo]()

    # entrenar + evaluar (sin RS)
    try:
        resultados = entrenar_y_evaluar_con_params_fijos(
            estimator=estimator,
            mejores_params=mejores_params,
            X_train=X_train,
            y_train=y_train,
            X_test=X_test,
            y_test=y_test,
        )
    except Exception as e:
        print(f"‚ùå Error durante fit/eval params fijos: {e}")
        continue

    print(f"‚úÖ Eval completada en {resultados['tiempo']:.2f} s")
    print(f"üìä F1(Test): {resultados['test']['f1']:.4f}")

    # registrar resultados (CV ya no aplica por combinaci√≥n: NaN)
    registros.append(asdict(RegistroRendimiento(
        dataset_logico=combo.dataset_logico,
        tipo_combination=combo.tipo_combination,
        nombre_modelo_aprendizaje=nombre_modelo,
        nombre_configuracion=combo.nombre_configuracion,
        tecnica_aumento=combo.tecnica_aumento,
        valor_densidad=str(combo.valor_densidad),
        valor_riesgo=str(combo.valor_riesgo),
        criterio_pureza=str(combo.criterio_pureza),
        grado_limpieza=str(combo.grado_limpieza),

        cantidad_train=int(X_train.shape[0]),
        cantidad_test=int(X_test.shape[0]),
        cantidad_caracteristicas=int(X_train.shape[1]),

        cv_f1_macro=round(contexto["cv_f1_macro"], 3),
        cv_balanced_accuracy=float("nan"),
        cv_recall_macro=float("nan"),

        test_f1_macro=round(resultados["test"]["f1"], 3),
        test_balanced_accuracy=round(resultados["test"]["bacc"], 3),
        test_recall_macro=round(resultados["test"]["recall_macro"], 3),

        mejores_hiperparametros=str(mejores_params),
        tiempo_busqueda_seg=float(resultados["tiempo"]),  # ahora es tiempo fit+pred
    )))

    gc.collect()

fin_total = time.perf_counter() - inicio_total
print(f"\n‚è±Ô∏è Tiempo total etapa train/test (params fijos): {fin_total/60:.2f} min")



üéØ ETAPA 1: TUNING SOLO EN 'BASE' (por dataset + modelo + I)

üéØ TUNING BASE [1/3] Dataset: predict_faults | Modelo: RandomForest | I=0
üìÇ Train(base): predict_faults_I0_tm8000_train.csv
üìÇ Test (base): predict_faults_tdataset10000_tm2000_test.csv
‚öôÔ∏è  RS(base): n_iter=4 (auto-ajustado dentro de ejecutar_rs), folds=5, n_jobs=1
üöÄ Iniciando RandomizedSearchCV en BASE...
Fitting 5 folds for each of 4 candidates, totalling 20 fits
‚úÖ RS(base) completado en 59.60 s | n_iter efectivo=4
üìä BASE F1(CV): 0.5086 | F1(Test): 0.5289
üß† Best params: {'classifier__n_estimators': 150, 'classifier__max_features': 'sqrt'}

üéØ TUNING BASE [2/3] Dataset: predict_faults | Modelo: RandomForest | I=1
üìÇ Train(base): predict_faults_I1_tm7917_train.csv
üìÇ Test (base): predict_faults_tdataset10000_tm2000_test.csv
‚öôÔ∏è  RS(base): n_iter=4 (auto-ajustado dentro de ejecutar_rs), folds=5, n_jobs=1
üöÄ Iniciando RandomizedSearchCV en BASE...
Fitting 5 folds for each of 4 candidates, tot

In [8]:
# =========================
# EXPORT FINAL A EXCEL (debug + match correcto)
# =========================
inicio_total = time.perf_counter()

df_resultados = pd.DataFrame(registros).copy()
df_resultados.reset_index(drop=True, inplace=True)

# Normalizar tipos (solo lo que export√°s)
for col in ["test_f1_macro", "test_balanced_accuracy", "test_recall_macro"]:
    if col in df_resultados.columns:
        df_resultados[col] = pd.to_numeric(df_resultados[col], errors="coerce")

def _tomar_una_fila(df_filtrado):
    # Si hay duplicados por re-ejecuci√≥n: tomamos la √öLTIMA (m√°s fiel al log de corrida)
    # Si prefer√≠s "mejor por f1", cambiamos ac√°.
    if df_filtrado is None or len(df_filtrado) == 0:
        return None
    return df_filtrado.iloc[-1]

resultados_formateados = []

# ---- DEBUG counters ----
dbg_total = 0
dbg_ok = 0
dbg_empty = 0

for comb in combinaciones:

    dbg_total += 1

    # 1) PCSMOTE: match por nombre_configuracion (es lo √∫nico realmente √∫nico)
    if comb.tipo_combination == "pcsmote":
        df_fila = df_resultados[
            (df_resultados["tipo_combination"] == "pcsmote") &
            (df_resultados["nombre_configuracion"] == comb.nombre_configuracion)
        ]

    # 2) CL√ÅSICOS: match por dataset + I + tecnica_aumento (smote/adasyn/borderlinesmote)
    elif comb.tipo_combination == "clasico":
        df_fila = df_resultados[
            (df_resultados["tipo_combination"] == "clasico") &
            (df_resultados["dataset_logico"] == comb.dataset_logico) &
            (df_resultados["grado_limpieza"].astype(str) == str(comb.grado_limpieza)) &
            (df_resultados["tecnica_aumento"] == comb.tecnica_aumento)
        ]

    # 3) BASE: match por dataset + I
    else:  # "base"
        df_fila = df_resultados[
            (df_resultados["tipo_combination"] == "base") &
            (df_resultados["dataset_logico"] == comb.dataset_logico) &
            (df_resultados["grado_limpieza"].astype(str) == str(comb.grado_limpieza))
        ]

    fila = _tomar_una_fila(df_fila)
    if fila is None:
        dbg_empty += 1
        # DEBUG puntual (no spammear)
        if dbg_empty <= 25:
            print(
                f"[DEBUG export] SIN MATCH | tipo={comb.tipo_combination} "
                f"ds={comb.dataset_logico} I={comb.grado_limpieza} "
                f"tec={comb.tecnica_aumento} cfg={comb.nombre_configuracion}"
            )
        continue

    # print(df_fila)    
    dbg_ok += 1
    tecnica = str(fila.get("tecnica_aumento", "base"))

    # Administrativos
    if comb.tipo_combination == "pcsmote":
        semillas_candidatas = comb.semillas_validas
        sinteticos_generados = comb.sinteticos_generados
    elif comb.tipo_combination == "clasico":
        semillas_candidatas = np.nan
        sinteticos_generados = comb.sinteticos_generados
    else:
        semillas_candidatas = np.nan
        sinteticos_generados = np.nan

    resultados_formateados.append({
        "tecnica": tecnica,
        "dataset": comb.dataset_logico,

        "percentil_radio_distancia": getattr(comb, "percentil_radio_distancia", "--"),
        "percentil_riesgo": getattr(comb, "percentil_riesgo", "--"),
        "criterio_pureza": getattr(comb, "criterio_pureza", "--"),
        "umbral_densidad": getattr(comb, "umbral_densidad", "--"),
        "umbral_riesgo": getattr(comb, "umbral_riesgo", "--"),
        "tipo_pureza": getattr(comb, "tipo_pureza", "--"),

        "grado_isolation_forest": str(comb.grado_limpieza),
        "test_n": int(fila.get("cantidad_test", 0)),
        "train_n": int(fila.get("cantidad_train", 0)),

        "semillas_candidatas_train": float(semillas_candidatas) if pd.notna(semillas_candidatas) else np.nan,
        "sinteticas_generadas": float(sinteticos_generados) if pd.notna(sinteticos_generados) else np.nan,

        "f1_macro_cv" : float(fila.get("cv_f1_macro", np.nan)), 
        "f1_macro_test": float(fila.get("test_f1_macro", np.nan)) if pd.notna(fila.get("test_f1_macro", np.nan)) else np.nan,
        # "balanced_accuracy_test": float(fila.get("test_balanced_accuracy", np.nan)) if pd.notna(fila.get("test_balanced_accuracy", np.nan)) else np.nan,
        # "recall_macro_test": float(fila.get("test_recall_macro", np.nan)) if pd.notna(fila.get("test_recall_macro", np.nan)) else np.nan,

        # "mejores_hiperparametros": fila.get("mejores_hiperparametros", ""),
        # "nombre_configuracion": comb.nombre_configuracion if comb.tipo_combination == "pcsmote" else Path(comb.ruta_train_csv).stem,
        # "tipo_combination": comb.tipo_combination,
    })

print(f"[DEBUG export] total_combinaciones={dbg_total} ok={dbg_ok} sin_match={dbg_empty}")

df_export = pd.DataFrame(resultados_formateados)
df_export.to_excel(NOMBRE_ARCHIVO_EXCEL, index=False)

duracion = round(time.perf_counter() - inicio_total, 2)
print(f"‚úÖ Export Excel final OK: {NOMBRE_ARCHIVO_EXCEL}")
print(f"üìä Filas exportadas: {len(df_export)}")
print(f"‚è±Ô∏è Duraci√≥n export: {duracion} seg")


[DEBUG export] SIN MATCH | tipo=base ds=ecoli I=0 tec=base cfg=
[DEBUG export] SIN MATCH | tipo=base ds=ecoli I=1 tec=base cfg=
[DEBUG export] SIN MATCH | tipo=base ds=ecoli I=3 tec=base cfg=
[DEBUG export] SIN MATCH | tipo=base ds=gear_vibration I=0 tec=base cfg=
[DEBUG export] SIN MATCH | tipo=base ds=gear_vibration I=1 tec=base cfg=
[DEBUG export] SIN MATCH | tipo=base ds=gear_vibration I=3 tec=base cfg=
[DEBUG export] SIN MATCH | tipo=base ds=glass I=0 tec=base cfg=
[DEBUG export] SIN MATCH | tipo=base ds=glass I=1 tec=base cfg=
[DEBUG export] SIN MATCH | tipo=base ds=glass I=3 tec=base cfg=
[DEBUG export] SIN MATCH | tipo=base ds=heart I=0 tec=base cfg=
[DEBUG export] SIN MATCH | tipo=base ds=heart I=1 tec=base cfg=
[DEBUG export] SIN MATCH | tipo=base ds=heart I=3 tec=base cfg=
[DEBUG export] SIN MATCH | tipo=base ds=telco_churn I=0 tec=base cfg=
[DEBUG export] SIN MATCH | tipo=base ds=telco_churn I=10 tec=base cfg=
[DEBUG export] SIN MATCH | tipo=base ds=telco_churn I=1 tec=base

In [None]:
# pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp070_UR050_I3_SV030_SG22250_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD050_Upp060_UR040_I0_SV022_SG22938_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD050_Upp060_UR040_I1_SV022_SG22707_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD050_Upp060_UR040_I3_SV024_SG22250_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD050_Upp060_UR050_I0_SV022_SG22938_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD050_Upp060_UR050_I1_SV022_SG22707_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD050_Upp060_UR050_I3_SV024_SG22250_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD050_Upp070_UR040_I0_SV022_SG22938_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD050_Upp070_UR040_I1_SV022_SG22707_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD050_Upp070_UR040_I3_SV024_SG22250_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD050_Upp070_UR050_I0_SV022_SG22938_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD050_Upp070_UR050_I1_SV022_SG22707_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD050_Upp070_UR050_I3_SV024_SG22250_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD060_Upp060_UR040_I0_SV018_SG22938_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD060_Upp060_UR040_I1_SV015_SG22707_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD060_Upp060_UR040_I3_SV018_SG22250_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD060_Upp060_UR050_I0_SV018_SG22938_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD060_Upp060_UR050_I1_SV015_SG22707_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD060_Upp060_UR050_I3_SV018_SG22250_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD060_Upp070_UR040_I0_SV018_SG22938_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD060_Upp070_UR040_I1_SV015_SG22707_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD060_Upp070_UR040_I3_SV018_SG22250_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD060_Upp070_UR050_I0_SV018_SG22938_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD060_Upp070_UR050_I1_SV015_SG22707_train.csv pcs_predict_faults_PRD70_PR30_CPprop_UD060_Upp070_UR050_I3_SV018_SG22250_train.csv pcs_predict_faults_PRD70_PR40_CPent_UD050_PE70_UR045_I0_SV040_SG22938_train.csv pcs_predict_faults_PRD70_PR40_CPent_UD050_PE70_UR045_I1_SV040_SG22707_train.csv pcs_predict_faults_PRD70_PR40_CPent_UD060_PE70_UR045_I0_SV036_SG22938_train.csv pcs_predict_faults_PRD70_PR40_CPent_UD060_PE70_UR045_I1_SV033_SG22707_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD050_Upp060_UR040_I0_SV022_SG22938_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD050_Upp060_UR040_I1_SV022_SG22707_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD050_Upp060_UR040_I3_SV024_SG22250_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD050_Upp060_UR050_I0_SV022_SG22938_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD050_Upp060_UR050_I1_SV022_SG22707_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD050_Upp060_UR050_I3_SV024_SG22250_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD050_Upp070_UR040_I0_SV022_SG22938_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD050_Upp070_UR040_I1_SV022_SG22707_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD050_Upp070_UR040_I3_SV024_SG22250_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD050_Upp070_UR050_I0_SV022_SG22938_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD050_Upp070_UR050_I1_SV022_SG22707_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD050_Upp070_UR050_I3_SV024_SG22250_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD060_Upp060_UR040_I0_SV018_SG22938_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD060_Upp060_UR040_I1_SV015_SG22707_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD060_Upp060_UR040_I3_SV018_SG22250_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD060_Upp060_UR050_I0_SV018_SG22938_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD060_Upp060_UR050_I1_SV015_SG22707_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD060_Upp060_UR050_I3_SV018_SG22250_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD060_Upp070_UR040_I0_SV018_SG22938_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD060_Upp070_UR040_I1_SV015_SG22707_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD060_Upp070_UR040_I3_SV018_SG22250_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD060_Upp070_UR050_I0_SV018_SG22938_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD060_Upp070_UR050_I1_SV015_SG22707_train.csv pcs_predict_faults_PRD70_PR40_CPprop_UD060_Upp070_UR050_I3_SV018_SG22250_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD050_Upp060_UR040_I0_SV022_SG22938_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD050_Upp060_UR040_I1_SV022_SG22707_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD050_Upp060_UR040_I3_SV024_SG22250_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD050_Upp060_UR050_I0_SV022_SG22938_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD050_Upp060_UR050_I1_SV022_SG22707_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD050_Upp060_UR050_I3_SV024_SG22250_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD050_Upp070_UR040_I0_SV022_SG22938_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD050_Upp070_UR040_I1_SV022_SG22707_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD050_Upp070_UR040_I3_SV024_SG22250_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD050_Upp070_UR050_I0_SV022_SG22938_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD050_Upp070_UR050_I1_SV022_SG22707_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD050_Upp070_UR050_I3_SV024_SG22250_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD060_Upp060_UR040_I0_SV018_SG22938_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD060_Upp060_UR040_I1_SV015_SG22707_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD060_Upp060_UR040_I3_SV018_SG22250_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD060_Upp060_UR050_I0_SV018_SG22938_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD060_Upp060_UR050_I1_SV015_SG22707_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD060_Upp060_UR050_I3_SV018_SG22250_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD060_Upp070_UR040_I0_SV018_SG22938_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD060_Upp070_UR040_I1_SV015_SG22707_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD060_Upp070_UR040_I3_SV018_SG22250_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD060_Upp070_UR050_I0_SV018_SG22938_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD060_Upp070_UR050_I1_SV015_SG22707_train.csv pcs_predict_faults_PRD70_PR50_CPprop_UD060_Upp070_UR050_I3_SV018_SG22250_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD050_Upp060_UR040_I0_SV030_SG22938_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD050_Upp060_UR040_I1_SV032_SG22707_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD050_Upp060_UR040_I3_SV034_SG22250_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD050_Upp060_UR050_I0_SV030_SG22938_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD050_Upp060_UR050_I1_SV032_SG22707_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD050_Upp060_UR050_I3_SV034_SG22250_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD050_Upp070_UR040_I0_SV030_SG22938_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD050_Upp070_UR040_I1_SV032_SG22707_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD050_Upp070_UR040_I3_SV034_SG22250_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD050_Upp070_UR050_I0_SV030_SG22938_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD050_Upp070_UR050_I1_SV032_SG22707_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD050_Upp070_UR050_I3_SV034_SG22250_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD060_Upp060_UR040_I0_SV029_SG22938_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD060_Upp060_UR040_I1_SV028_SG22707_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD060_Upp060_UR040_I3_SV030_SG22250_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD060_Upp060_UR050_I0_SV029_SG22938_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD060_Upp060_UR050_I1_SV028_SG22707_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD060_Upp060_UR050_I3_SV030_SG22250_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD060_Upp070_UR040_I0_SV029_SG22938_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD060_Upp070_UR040_I1_SV028_SG22707_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD060_Upp070_UR040_I3_SV030_SG22250_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD060_Upp070_UR050_I0_SV029_SG22938_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD060_Upp070_UR050_I1_SV028_SG22707_train.csv pcs_predict_faults_PRD85_PR30_CPprop_UD060_Upp070_UR050_I3_SV030_SG22250_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD050_PE70_UR045_I0_SV048_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD050_PE70_UR045_I1_SV050_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD060_PE70_UR045_I0_SV047_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD060_PE70_UR045_I1_SV046_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp060_UR040_I0_SV030_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp060_UR040_I1_SV032_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp060_UR040_I3_SV034_SG22250_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp060_UR050_I0_SV030_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp060_UR050_I1_SV032_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp060_UR050_I3_SV034_SG22250_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp070_UR040_I0_SV030_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp070_UR040_I1_SV032_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp070_UR040_I3_SV034_SG22250_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp070_UR050_I0_SV030_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp070_UR050_I1_SV032_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp070_UR050_I3_SV034_SG22250_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp060_UR040_I0_SV029_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp060_UR040_I1_SV028_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp060_UR040_I3_SV030_SG22250_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp060_UR050_I0_SV029_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp060_UR050_I1_SV028_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp060_UR050_I3_SV030_SG22250_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp070_UR040_I0_SV029_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp070_UR040_I1_SV028_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp070_UR040_I3_SV030_SG22250_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp070_UR050_I0_SV029_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp070_UR050_I1_SV028_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp070_UR050_I3_SV030_SG22250_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp060_UR040_I0_SV030_SG22938_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp060_UR040_I1_SV032_SG22707_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp060_UR040_I3_SV034_SG22250_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp060_UR050_I0_SV030_SG22938_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp060_UR050_I1_SV032_SG22707_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp060_UR050_I3_SV034_SG22250_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp070_UR040_I0_SV030_SG22938_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp070_UR040_I1_SV032_SG22707_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp070_UR040_I3_SV034_SG22250_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp070_UR050_I0_SV030_SG22938_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp070_UR050_I1_SV032_SG22707_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp070_UR050_I3_SV034_SG22250_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp060_UR040_I0_SV029_SG22938_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp060_UR040_I1_SV028_SG22707_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp060_UR040_I3_SV030_SG22250_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp060_UR050_I0_SV029_SG22938_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp060_UR050_I1_SV028_SG22707_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp060_UR050_I3_SV030_SG22250_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp070_UR040_I0_SV029_SG22938_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp070_UR040_I1_SV028_SG22707_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp070_UR040_I3_SV030_SG22250_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp070_UR050_I0_SV029_SG22938_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp070_UR050_I1_SV028_SG22707_train.csv

In [None]:
# pcs_predict_faults_PRD85_PR40_CPent_UD050_PE60_UR045_I0_SV030_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD050_PE60_UR045_I1_SV031_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD050_PE70_UR045_I0_SV048_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD050_PE70_UR045_I1_SV050_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD050_PE80_UR045_I0_SV048_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD050_PE80_UR045_I1_SV050_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD050_PE90_UR045_I0_SV051_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD050_PE90_UR045_I1_SV054_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD060_PE60_UR045_I0_SV029_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD060_PE60_UR045_I1_SV027_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD060_PE70_UR045_I0_SV047_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD060_PE70_UR045_I1_SV046_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD060_PE80_UR045_I0_SV047_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD060_PE80_UR045_I1_SV046_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD060_PE90_UR045_I0_SV050_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPent_UD060_PE90_UR045_I1_SV049_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp045_UR045_I0_SV051_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp045_UR045_I1_SV054_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp045_UR045_I3_SV055_SG22250_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp045_UR055_I0_SV051_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp045_UR055_I1_SV054_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp045_UR055_I3_SV055_SG22250_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp055_UR045_I0_SV051_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp055_UR045_I1_SV054_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp055_UR045_I3_SV055_SG22250_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp055_UR055_I0_SV051_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp055_UR055_I1_SV054_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD050_Upp055_UR055_I3_SV055_SG22250_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp045_UR045_I0_SV050_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp045_UR045_I1_SV049_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp045_UR045_I3_SV051_SG22250_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp045_UR055_I0_SV050_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp045_UR055_I1_SV049_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp045_UR055_I3_SV051_SG22250_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp055_UR045_I0_SV050_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp055_UR045_I1_SV049_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp055_UR045_I3_SV051_SG22250_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp055_UR055_I0_SV050_SG22938_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp055_UR055_I1_SV049_SG22707_train.csv pcs_predict_faults_PRD85_PR40_CPprop_UD060_Upp055_UR055_I3_SV051_SG22250_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp045_UR045_I0_SV051_SG22938_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp045_UR045_I1_SV054_SG22707_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp045_UR045_I3_SV055_SG22250_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp045_UR055_I0_SV051_SG22938_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp045_UR055_I1_SV054_SG22707_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp045_UR055_I3_SV055_SG22250_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp055_UR045_I0_SV051_SG22938_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp055_UR045_I1_SV054_SG22707_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp055_UR045_I3_SV055_SG22250_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp055_UR055_I0_SV051_SG22938_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp055_UR055_I1_SV054_SG22707_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD050_Upp055_UR055_I3_SV055_SG22250_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp045_UR045_I0_SV050_SG22938_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp045_UR045_I1_SV049_SG22707_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp045_UR045_I3_SV051_SG22250_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp045_UR055_I0_SV050_SG22938_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp045_UR055_I1_SV049_SG22707_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp045_UR055_I3_SV051_SG22250_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp055_UR045_I0_SV050_SG22938_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp055_UR045_I1_SV049_SG22707_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp055_UR045_I3_SV051_SG22250_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp055_UR055_I0_SV050_SG22938_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp055_UR055_I1_SV049_SG22707_train.csv pcs_predict_faults_PRD85_PR50_CPprop_UD060_Upp055_UR055_I3_SV051_SG22250_train.csv