In [None]:
from datetime import datetime

import pyspark.sql.functions as F

from vars import TABLE_CONFIGS
from commons import (
    init_watermark_table,
    get_watermark,
    update_watermark,
    extract_incremental_from_postgres,
    write_landing,
)


def run_landing_for_table(table_name: str) -> None:
    if table_name not in TABLE_CONFIGS:
        raise ValueError(f"Tabela '{table_name}' não está configurada.")
    conf = TABLE_CONFIGS[table_name]
    last_wm = get_watermark(table_name)
    df_src = extract_incremental_from_postgres(table_name, conf, last_wm)
    if df_src.rdd.isEmpty():
        update_watermark(table_name, last_wm)
        return
    batch_id = datetime.now().strftime("%Y%m%d%H%M%S")
    write_landing(df_src, table_name, batch_id)
    new_max_data_ref = (
        df_src.agg(F.max("data_ref").alias("max_dr")).collect()[0]["max_dr"]
    )
    update_watermark(table_name, new_max_data_ref)


init_watermark_table()
for tbl in TABLE_CONFIGS.keys():
    run_landing_for_table(tbl)
