In [0]:
from pyspark.sql.functions import col

In [0]:
dbutils.widgets.removeAll()

In [0]:
dbutils.widgets.text("target_table_name", "ALL")

In [0]:
target_input = dbutils.widgets.get("target_table_name")

In [0]:
def load_curated_table(target_table_name, mapping_df):
    try:
        table_map = (
            mapping_df
            .filter(col("target_table_name") == target_table_name)
            .orderBy("taregt_field_order")
        )
        curated_table_id = table_map.select("curated_table_id").first().curated_table_id
        source_schema = table_map.select("source_schema").first().source_schema
        source_table_name = table_map.select("source_table_name").first().source_table_name

        source_fields = [row.source_field_name for row in table_map.collect()]
        target_fields = [row.target_field_name for row in table_map.collect()]
        target_types = [row.target_data_type for row in table_map.collect()]
        target_schema = table_map.select("target_schema").first().target_schema

        source_df = spark.table(f"{source_schema}.{source_table_name}").select(*source_fields)
        for idx, tgt_col in enumerate(target_fields):
            src_col = source_fields[idx]
            tgt_type = target_types[idx]
            source_df = source_df.withColumnRenamed(src_col, tgt_col).withColumn(tgt_col, col(tgt_col).cast(tgt_type))

        target_full_name = f"{target_schema}.{target_table_name}"
        spark.sql(f"CREATE TABLE IF NOT EXISTS {target_full_name} ({', '.join([f'{c} {t}' for c, t in zip(target_fields, target_types)])})")
        source_df.write.mode("append").insertInto(target_full_name)
    except Exception as e:
        raise RuntimeError(f"Failed to load table {target_table_name}: {str(e)}")

In [0]:
mapping_df = (
    spark.table("workspace.control.curated_table_mapping")
    .filter(col("is_active") == True)
)

if target_input.upper() == "ALL":
    table_seq_df = (
        mapping_df
        .select("curated_table_id", "target_table_name", "tareget_table_sequency")
        .distinct()
        .orderBy("tareget_table_sequency")
    )
    table_list = [row.target_table_name for row in table_seq_df.collect()]
else:
    table_list = [target_input]

In [0]:
display(table_list)

In [0]:
for tgt_table in table_list:
    load_curated_table(tgt_table, mapping_df)