In [0]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window

def apply_cast(df,casts:dict):
    # casts:{'col':"long"/"string"/"date"/"ts"}
    for c,t in casts.items():
        if t=="date":
            df=df.withColumn(c,F.to_date(F.col(c)))
        elif t=="timestamp":
            df=df.withColumn(c,F.to_timestamp(F.col(c)))
        else:
            df=df.withColumn(c,F.col(c).cast(t))
    return df

def add_event_key(df,key_cols,existing_key_col=None,sep="||"):
    # if existing event key, use it, else create
    if existing_key_col:
        df=df.withColumn("event_key",
                         F.when(
                             F.col(existing_key_col).isNotNull()&(F.col(existing_key_col)!=""),
                             F.col(existing_key_col).cast("string")
                         ).otherwise(F.sha2(F.concat_ws(sep,*[F.coelesce(F.col(c).cast("string"),F.lit("")) for c in key_cols]),
                                            256
                                            )
                                     )
                         )
    else:
        return df.withColumn("event_key",
                         F.sha2(F.concat_ws(sep,*[F.coelesce(F.col(c).cast("string"),F.lit("")) for c in key_cols]),
                                256
                         )
        )

def dedupe_lateste(df,key_col="event_key",order_col="ROW_INSERT_DATE"):
    w=Window.partitionBy(key_col).orderBy(F.col(order_col).desc_nulls_last())
    return df.withColumn("rn",F.row_number().over(w)).filter("rn=1").drop("rn")

def ensure_table_schema(df,silver_tbl):
    if not spark.catalog.tableExists(silver_tbl):
        df.limit(0).write.format("delta").mode("overwrite").saveAsTable(silver_tbl)

def get_max_dt(silver_tbl,order_col):
    if not spark.catalog.tableExists(silver_tbl):
        return None
    return spark.sql(f"SELECT max({order_col}) AS max_dt FROM {silver_tbl}").collect()[0]['max_dt']

def apply_lookback(df,max_dt,order_col,lookback_days=2):
    if max_dt is None:
        return df
    return df.filter(F.col(order_col)>=F.date_sub(F.lit(max_dt),lookback_days))

def merge_delta(silver_tbl,staging_df,key_col="event_key",order_col="ROW_INSERT_DATE"):
    staging_df.createOrReplaceTempView("stg_upsert")
    spark.sql(f"""
    MERGE INTO {silver_tbl} t
    USING stg _upsert s
    ON s.{key_col}=t.{key_col}
    WHEN MATCHED AND coalesce(s.{order_col},to_date('1900-01-01'))>coalesce(t.{order_col},to_date('1900-01-01'))
    THEN UPDATE SET *
    WHEN NOT MATCHED THEN INSERT *
    """)



    