In [None]:
import logging


In [None]:
# Importar funciones de utilidades
dbutils.notebook.run("/FileStore/etl/utils", 0)

In [None]:
# Configurar logging
logging.basicConfig(level=logging.INFO)

In [None]:
#Definición de rutas
rutas = {
    "transactions": "FileStore/raw/credit_transactions.csv",
    "accounts": "FileStore/raw/customer_accounts.csv",
    "demographics": "FileStore/raw/customer_demographics.csv",
    "merchant": "FileStore/raw/merchant_categories.csv"
}

df_dict = {key: upload_csv(path) for key, path in rutas.items()}

In [None]:
# Definir claves primarias para validación de nulos
primary_keys = {
    "transactions": "transaction_id",
    "accounts": "customer_id",
    "demographics": "customer_id",
    "merchant": "category"
}

In [None]:
# Verificar que todos los dataframes se cargaron correctamente
df_dict = {key: upload_csv(path) for key, path in rutas.items()}

for key, df in df_dict.items():
    if df is None:
        logging.error(f"Error en la carga del archivo {key}. Deteniendo el proceso.")
        exit(1)

    # Validar si la clave primaria tiene valores nulos
    primary_key = primary_keys[key]
    null_count = df.filter(df[primary_key].isNull()).count()

    if null_count > 0:
        logging.warning(f"La tabla {key} contiene {null_count} valores nulos en la clave primaria {primary_key}.")
        df = df.filter(df[primary_key].isNotNull())

    df_dict[key] = df

In [None]:
#Guardar tablas en formato optimizado (si no hay errores)
try:
    for key, df in df_dict.items():
        if df is not None:
            df.write.mode("overwrite").parquet(f"FileStore/bronze/{key}")
            logging.info(f"Tabla {key} guardada en Bronze correctamente.")
except Exception as e:
    logging.error(f"Error al escribir en Bronze: {str(e)}")