In [4]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from datetime import datetime
import json

# Parámetros
TEST_SIZE = 0.2
RANDOM_STATE = 42

def create_train_test_split(input_path, output_path, norm_name, selector_name):
    """
    Create train-test splits for a specific normalization and selector combination
    """
    print(f"\nProcesando {norm_name} - {selector_name}")
    
    # Determinar si es versión full o selected
    file_suffix = "full" if selector_name == "Full" else "selected"
    
    # Leer el dataset
    input_file = os.path.join(input_path, f"{norm_name}_{selector_name}_{file_suffix}.parquet")
    df = pd.read_parquet(input_file)
    
    # Separar features y target
    X = df.drop(columns=['nivel_triage'])
    y = df['nivel_triage']
    
    # Crear split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, 
        test_size=TEST_SIZE,
        random_state=RANDOM_STATE,
        stratify=y
    )
    
    # Crear DataFrames para y_train y y_test
    y_train_df = pd.DataFrame(y_train, columns=['nivel_triage'])
    y_test_df = pd.DataFrame(y_test, columns=['nivel_triage'])
    
    # Crear carpeta de salida
    split_output_path = os.path.join(output_path, f"{norm_name}_{selector_name}")
    os.makedirs(split_output_path, exist_ok=True)
    
    # Guardar archivos con nombres que incluyen la variante
    X_train.to_parquet(
        os.path.join(split_output_path, f"X_train_{norm_name}_{selector_name}.parquet"),
        index=False
    )
    X_test.to_parquet(
        os.path.join(split_output_path, f"X_test_{norm_name}_{selector_name}.parquet"),
        index=False
    )
    y_train_df.to_parquet(
        os.path.join(split_output_path, f"y_train_{norm_name}_{selector_name}.parquet"),
        index=False
    )
    y_test_df.to_parquet(
        os.path.join(split_output_path, f"y_test_{norm_name}_{selector_name}.parquet"),
        index=False
    )
    
    print(f"✅ Archivos guardados en: {split_output_path}")
    print(f"   Train shape: {X_train.shape}")
    print(f"   Test shape: {X_test.shape}")

def main():
    # Configuración de paths
    base_path = os.path.dirname(os.getcwd())
    config_path = os.path.join(base_path, "config.json")
    
    with open(config_path, "r") as f:
        config = json.load(f)
    
    # Crear timestamp para la carpeta de salida
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Paths de entrada y salida
    experiment_input_path = os.path.join(
        base_path, 
        config["paths"]["outputs"],
        "experiments",
        "experimento_final_20250131_210835"  # Ajusta esto según tu última corrida de feature selection
    )
    
    train_test_output_path = os.path.join(
        base_path,
        config["paths"]["outputs"],
        "experiments",
        f"train_test_{timestamp}"
    )
    
    # Crear carpeta de salida
    os.makedirs(train_test_output_path, exist_ok=True)
    
    # Normalizaciones y selectores disponibles
    normalizations = ["MaxAbs", "MinMax", "NoNorm", "Robust", "Standard"]
    selectors = ["Linear", "Nonlinear", "Model-Based", "Full"]  # Añadido "Full" a los selectores
    
    # Procesar cada combinación
    for norm in normalizations:
        norm_path = os.path.join(experiment_input_path, norm)
        if not os.path.exists(norm_path):
            print(f"⚠️ Carpeta no encontrada: {norm_path}")
            continue
            
        for selector in selectors:
            try:
                create_train_test_split(
                    input_path=norm_path,
                    output_path=train_test_output_path,
                    norm_name=norm,
                    selector_name=selector
                )
            except Exception as e:
                print(f"❌ Error procesando {norm} - {selector}: {str(e)}")

if __name__ == "__main__":
    main()


Procesando MaxAbs - Linear
✅ Archivos guardados en: c:\Users\Administrador\Documents\PythonScripts\Tesis\tesisaustral\outputs\experiments\train_test_20250201_113954\MaxAbs_Linear
   Train shape: (448388, 10)
   Test shape: (112098, 10)

Procesando MaxAbs - Nonlinear
✅ Archivos guardados en: c:\Users\Administrador\Documents\PythonScripts\Tesis\tesisaustral\outputs\experiments\train_test_20250201_113954\MaxAbs_Nonlinear
   Train shape: (448388, 72)
   Test shape: (112098, 72)

Procesando MaxAbs - Model-Based
✅ Archivos guardados en: c:\Users\Administrador\Documents\PythonScripts\Tesis\tesisaustral\outputs\experiments\train_test_20250201_113954\MaxAbs_Model-Based
   Train shape: (448388, 40)
   Test shape: (112098, 40)

Procesando MaxAbs - Full
❌ Error procesando MaxAbs - Full: [Errno 2] No such file or directory: 'c:\\Users\\Administrador\\Documents\\PythonScripts\\Tesis\\tesisaustral\\outputs\\experiments\\experimento_final_20250131_210835\\MaxAbs\\MaxAbs_Full_full.parquet'

Procesando