In [9]:
import pandas as pd
import os
import json
from sklearn.preprocessing import MinMaxScaler, StandardScaler, MaxAbsScaler, RobustScaler

In [13]:
# Cargar configuración
with open("config.json", "r") as f:
    config = json.load(f)

base_path = os.path.dirname(os.getcwd())
data_path = os.path.join(base_path, config["paths"]["intermediate"])
normalized_path = os.path.join(base_path, config["paths"]["intermediate"])
os.makedirs(normalized_path, exist_ok=True)

# Cargar dataset original
dataset_file = os.path.join(data_path, config["parameters"]["02_Normalization"]["dataset_file"])
df = pd.read_parquet(dataset_file)

# Columnas para normalizar
columns_to_normalize = [
                "n_visita_urgencia", "n_admisiones", "n_cirugias", "med_combinacion_analgesica_y_antihistaminica",
                "med_analgesicos", "med_anestesicos", "med_anti-obesidad", "med_antialergia", "med_antiartriticos",
                "med_antiasma", "med_antibioticos", "med_anticoagulantes", "med_antidotos", "med_antifungicos",
                "med_combinacion_de_antihistaminico_y_descongestionante", "med_antihistaminicos", "med_antihiperglucemicos",
                "med_antiinfecciosos", "med_antiinfectives/varios", "med_antineopultimoics", "med_antiparkinsondrogas",
                "med_antiplaquetarios", "med_antivirales", "med_autonomas", "med_cardiacos", "med_cardiovasculares",
                "med_cns", "med_anticonceptivos", "med_diureticos", "med_gastrointestinales", "med_inmunosupresores",
                "med_investigacional", "med_relajantes_musculares", "med_pre-natalvitaminas", "med_psicoterapeuticos",
                "med_sedantes/hipnoticos", "med_vitaminas", "cxr_cuenta", "cuenta_echo", "recuento_electrocardiograma",
                "headct_cuenta", "mri_cuenta", "otroct_cuenta", "otroimg_cuenta", "otrosus_cuenta", "otroxr_cuenta",
                "edad", "absolutamentemphcitocuenta_ultimo", "albumina_ultima", "anc(absneutrophilcuenta)_ultimo",
                "aniongap_ultimo", "aspartatoaminotransferasa(ast)_ultimo", "b-typenatriureticpeptidepro(probnp)_ultimo",
                "bilirrubinadirect_ultimo", "panecillo_ultimo", "bollo/creatratio_ultimo", "calcio_ultimo", "cloruro_ultimo",
                "creatinina_ultimo", "dimero_d_ultimo", "egfr_ultimo", "glucosa_ultima", "hematocrito_ultimo",
                "hemoglobina_ultima", "inr_ultimo", "lactatepoc_ultimo", "plaquetas_ultimo", "potasio_ultimo",
                "sodio_ultimo", "troponini(poc)_ultimo", "wbc_ultimo"
            ]

In [14]:
# Métodos de normalización
scalers = {
    "maxabs": MaxAbsScaler(),
    "minmax": MinMaxScaler(),
    "standard": StandardScaler(),
    "robust": RobustScaler(),
    "none": None  # Sin normalización
}

# Aplicar normalizaciones
for method, scaler in scalers.items():
    df_normalized = df.copy()
    if scaler is not None:
        df_normalized[columns_to_normalize] = scaler.fit_transform(df[columns_to_normalize])
    # Guardar resultados
    output_file = os.path.join(normalized_path, "normalized", f"02_df_{method.capitalize()}.parquet")
    df_normalized.to_parquet(output_file, index=False)
    print(f"Dataset normalizado guardado en: {output_file}")


Dataset normalizado guardado en: c:\Users\Administrador\Documents\PythonScripts\Tesis\tesisaustral\intermediate\normalized\02_df_Maxabs.parquet
Dataset normalizado guardado en: c:\Users\Administrador\Documents\PythonScripts\Tesis\tesisaustral\intermediate\normalized\02_df_Minmax.parquet
Dataset normalizado guardado en: c:\Users\Administrador\Documents\PythonScripts\Tesis\tesisaustral\intermediate\normalized\02_df_Standard.parquet
Dataset normalizado guardado en: c:\Users\Administrador\Documents\PythonScripts\Tesis\tesisaustral\intermediate\normalized\02_df_Robust.parquet
Dataset normalizado guardado en: c:\Users\Administrador\Documents\PythonScripts\Tesis\tesisaustral\intermediate\normalized\02_df_None.parquet
