## Audit Configuration

In [0]:
import dlt
import json
from pyspark.sql.functions import *
from pyspark.sql.types import *
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from pyspark.sql.window import Window
import uuid
from delta.tables import DeltaTable

In [0]:
# audit_mnt = "/mnt/ingest00curatedsboxsilver/ARIADM/ARM/AUDIT/TD"
audit_delta_path = "/mnt/ingest00curatedsboxsilver/ARIADM/ARM/AUDIT/APPEALS/ARIAFTA/td_cr_audit_table"

In [0]:


def datetime_uuid():
    dt_str = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
    return str(uuid.uuid5(uuid.NAMESPACE_DNS, dt_str))

run_id_value = datetime_uuid()

audit_schema = StructType([
    StructField("Runid", StringType(), True),
    StructField("Unique_identifier_desc", StringType(), True),
    StructField("Unique_identifier", StringType(), True),
    StructField("Table_name", StringType(), True),
    StructField("Stage_name", StringType(), True),
    StructField("Record_count", IntegerType(), True),
    StructField("Run_dt", TimestampType(), True),
    StructField("Batch_id", StringType(), True),
    StructField("Description", StringType(), True),
    StructField("File_name", StringType(), True),
    StructField("Status", StringType(), True)
])

def create_audit_df(df: DataFrame, unique_identifier_desc: str, table_name: str, stage_name: str, description: str, additional_columns: list = None) -> DataFrame:
    """
    Creates an audit DataFrame and writes it to Delta format.

    :param df: Input DataFrame from which unique identifiers are extracted.
    :param unique_identifier_desc: Column name that acts as a unique identifier.
    :param table_name: Name of the source table.
    :param stage_name: Name of the data processing stage.
    :param description: Description of the table.
    :param additional_columns: List of additional columns to include in the audit DataFrame.
    :return: DataFrame containing the audit information.
    """

    dt_desc = datetime.utcnow()

    additional_columns = additional_columns or []  # Default to an empty list if None   
    additional_columns = [col(c) for c in additional_columns if c is not None]  # Filter out None values

    audit_df = df.select(col(unique_identifier_desc).alias("unique_identifier"), *additional_columns) \
        .withColumn("Runid", lit(run_id_value)) \
        .withColumn("Unique_identifier_desc", lit(unique_identifier_desc)) \
        .withColumn("Stage_name", lit(stage_name)) \
        .withColumn("Table_name", lit(table_name)) \
        .withColumn("Run_dt", lit(dt_desc).cast(TimestampType())) \
        .withColumn("Description", lit(description))

    list_cols = audit_df.columns

    final_audit_df = audit_df.groupBy(*list_cols).agg(count("*").cast(IntegerType()).alias("Record_count"))

    # final_audit_df.write.format("delta").mode("append").option("mergeSchema", "true").save(audit_delta_path)
    
    return final_audit_df

In [0]:
# Define Delta Table Path in Azure Storage


if not DeltaTable.isDeltaTable(spark, audit_delta_path):
    print(f"🛑 Delta table '{audit_delta_path}' does not exist. Creating an empty Delta table...")

    # Create an empty DataFrame
    empty_df = spark.createDataFrame([], audit_schema)

    # Write the empty DataFrame in Delta format to create the table
    empty_df.write.format("delta").mode("overwrite").save(audit_delta_path)

    print("✅ Empty Delta table successfully created in Azure Storage.")
else:
    print(f"⚡ Delta table '{audit_delta_path}' already exists.")

In [0]:
audit_params_bronze = [
        {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_appealcase_cr_cs_ca_fl_cres_mr_res_lang",
        "stage_name": "bronze_stage",
        "description": "The bronze_appealcase_cr_cs_ca_fl_cres_mr_res_lang Delta Live Table combining Appeal Case data with Case Respondent, Main Respondent, Respondent, File Location, Case Representative, Representative, and Language.."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_appealcase_ca_apt_country_detc",
        "stage_name": "bronze_stage",
        "description": "The bronze_appealcase_ca_apt_country_detc Delta Live Table combining Case Appellant data with Appellant, Detention Centre, and Country information.."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_appealcase_cl_ht_list_lt_hc_c_ls_adj",
        "stage_name": "bronze_stage",
        "description": "The bronze_appealcase_cl_ht_list_lt_hc_c_ls_adj Delta Live Table combining Status, Case List, Hearing Type, Adjudicator, Court, and other related details."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_appealcase_bfdiary_bftype",
        "stage_name": "bronze_stage",
        "description": "The bronze_appealcase_bfdiary_bftype Delta Live Table combining BFDiary and BFType details.."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_appealcase_history_users",
        "stage_name": "bronze_stage",
        "description": "The bronze_appealcase_history_users Delta Live Table combining History and Users details"
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_appealcase_link_linkdetail",
        "stage_name": "bronze_stage",
        "description": "The bronze_appealcase_link_linkdetail Delta Live Table combining Case Appellant data with Appellant, Detention Centre, and Country information.."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_appealcase_status_sc_ra_cs",
        "stage_name": "bronze_stage",
        "description": "The bronze_appealcase_status_sc_ra_cs Delta Live Table joining Status, CaseStatus, StatusContact, ReasonAdjourn, Language, and DecisionType details."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_appealcase_appealcatagory_catagory",
        "stage_name": "bronze_stage",
        "description": "The bronze_appealcase_appealcatagory_catagory Delta Live Table for joining AppealCategory and Category tables to retrieve case and category details."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_appealcase_p_e_cfs_prr_fs_cs_hc_ag_at",
        "stage_name": "bronze_stage",
        "description": "The bronze_appealcase_t_tt_ts_tm Delta Live Table for joining Transaction, TransactionType, TransactionStatus, and TransactionMethod tables to retrieve transaction details."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_status_decisiontype",
        "stage_name": "bronze_stage",
        "description": "The bronze_status_decisiontype Delta Live Table for joining Status and DecisionType tables to retrieve case and decision type details."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_appealcase_t_tt_ts_tm",
        "stage_name": "bronze_stage",
        "description": "The bronze_appealcase_t_tt_ts_tm Delta Live Table combining Case Appellant data with Appellant, Detention Centre, and Country information.."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_appealcase_ahr_hr",
        "stage_name": "bronze_stage",
        "description": "The bronze_appealcase_ahr_hr Delta Live Table for joining AppealHumanRight and HumanRight tables to retrieve case and human rights details."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_appealcase_anm_nm",
        "stage_name": "bronze_stage",
        "description": "The bronze_appealcase_anm_nm Delta  Live Table for joining AppealNewMatter and NewMatter tables to retrieve appeal and new matter details."
    }

    ,
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_appealcase_dr_rd",
        "stage_name": "bronze_stage",
        "description": "The bronze_appealcase_dr_rd Delta Live Table for joining DocumentsReceived and ReceivedDocument tables to retrieve document details."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_appealcase_rsd_sd",
        "stage_name": "bronze_stage",
        "description": "The bronze_appealcase_rsd_sd Delta Live Table for joining ReviewStandardDirection and StandardDirection tables to retrieve review standard direction details."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_review_specific_direction",
        "stage_name": "bronze_stage",
        "description": "The bronze_review_specific_direction Delta Live Table for retrieving details from the ReviewSpecificDirection table."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_cost_award",
        "stage_name": "bronze_stage",
        "description": "The bronze_cost_award Delta Live Table for retrieving details from the CostAward table."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_cost_award_linked",
        "stage_name": "bronze_stage",
        "description": "The bronze_cost_award_linked Delta Live Table for retrieving details from the CostAward_linked table."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_costorder",
        "stage_name": "bronze_stage",
        "description": "The bronze_costorder Delta Live Table for retrieving details from the CostOrder table."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_hearing_points_change_reason",
        "stage_name": "bronze_stage",
        "description": "The bronze_hearing_points_change_reason Delta Live Table for retrieving details from the HearingPointsChangeReason table"
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_appeal_type_category",
        "stage_name": "bronze_stage",
        "description": "The bronze_appeal_type_category Delta Live Table for retrieving details from the AppealTypeCategory table..ble."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_appeal_grounds",
        "stage_name": "bronze_stage",
        "description": "The bronze_appeal_grounds Delta Live Table for retrieving details from the AppealTypeCategory table.."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_required_incompatible_adjudicator",
        "stage_name": "bronze_stage",
        "description": "The bronze_required_incompatible_adjudicator Delta Live Table for retrieving Appeal Grounds with Appeal Type descriptions.."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "bronze_case_adjudicator",
        "stage_name": "bronze_stage",
        "description": "The bronze_case_adjudicator Delta Live Table for retrieving Appeal Grounds with Appeal Type descriptions.."
    }
]

In [0]:
audit_params_segmentation = [
        {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "stg_appealcasestatus_filtered",
        "stage_name": "segmentation_stage",
        "description": "This is the stg_appealcasestatus_filtered table segmentation logic to identify distinct Appeals categories so in later stages we can identify the distinct appeal categories."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "stg_firsttier_filtered",
        "stage_name": "segmentation_stage",
        "description": "This is the stg_firsttier_filtered table to Identify cases 'FT Retained - ARM catagory' and mark is as ARIAFTA(FirstTierApplication)"
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "stg_skeleton_filtered",
        "stage_name": "segmentation_stage",
        "description": "This is the  skeleton cases using the segmentation logic-and mark is as ARIAFTA(FirstTierApplication)"
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "stg_uppertribunalretained_filtered",
        "stage_name": "segmentation_stage",
        "description": "This is the stg_uppertribunalretained_filtered table to Identify cases 'UT Retained' catagory and mark is as ARIAUTA(UpperTribinalApplication)"
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "stg_firsttieroverdue_filtered",
        "stage_name": "segmentation_stage",
        "description": "This is the stg_firsttieroverdue_filtered table to Identify cases 'FT Overdue' - ARM catagory and mark is as ARIAFTA(FirstTierApplication)"
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "stg_uppertribunaloverdue_filtered",
        "stage_name": "segmentation_stage",
        "description": "This is the stg_uppertribunaloverdue_filtered table to Identify cases 'UT Overdue' - ARM catagory and mark is as ARIAUTA(UpperTribinalApplication)"
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "stg_filepreservedcases_filtered",
        "stage_name": "segmentation_stage",
        "description": "This is the stg_filepreservedcases_filtered table to Identify cases with CaseType = 1 and DeptID == 520 and make is as 'ARIAFilePreservedCases' - ARM catagory and mark is as ARIAUTA(UpperTribinalApplication)"
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "stg_appeals_filtered",
        "stage_name": "segmentation_stage",
        "description": "This is the stg_appeals_filtered table to Identify cases  ARIAFTA(FirstTierApplication)"
    }
    
    ]


In [0]:
audit_params_silver = [
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_appealcase_detail",
        "stage_name": "silver_stage",
        "description": "The silver_appealcase_detail table contains Appeal Case details derived from bronze_appealcase_cr_cs_ca_fl_cres_mr_res_lang, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_applicant_detail",
        "stage_name": "silver_stage",
        "description": "The silver_applicant_detail table contains case applicant details derived from bronze_appealcase_ca_apt_country_detc, filtered by Appeal Category and where CaseAppellantRelationship is null, excluding dependents."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_dependent_detail",
        "stage_name": "silver_stage",
        "description": "The silver_dependent_detail table contains case applicant dependents details derived from bronze_appealcase_ca_apt_country_detc, filtered by Appeal Category and where CaseAppellantRelationship is not null, excluding applicants."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_list_detail",
        "stage_name": "silver_stage",
        "description": "The silver_list_detail table contains list details derived from bronze_appealcase_cl_ht_list_lt_hc_c_ls_adj, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_dfdairy_detail",
        "stage_name": "silver_stage",
        "description": "The silver_dfdairy_detail table contains BFDiary details derived from bronze_appealcase_bfdiary_bftype, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_history_detail",
        "stage_name": "silver_stage",
        "description": "The silver_history_detail table contains history details derived from bronze_appealcase_history_users, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_link_detail",
        "stage_name": "silver_stage",
        "description": "The silver_link_detail table contains link details derived from bronze_appealcase_history_users, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_status_detail",
        "stage_name": "silver_stage",
        "description": "The silver_status_detail table contains status details derived from bronze_appealcase_appealcatagory_catagory, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_appealcategory_detail",
        "stage_name": "silver_stage",
        "description": "The silver_appealcategory_detail table contains appeal category details derived from bronze_appealcase_history_users, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_case_detail",
        "stage_name": "silver_stage",
        "description": "The silver_case_detail table contains case details derived from bronze_appealcase_p_e_cfs_prr_fs_cs_hc_ag_at, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_statusdecisiontype_detail",
        "stage_name": "silver_stage",
        "description": "The silver_statusdecisiontype_detail table contains status decision type details derived from bronze_status_decisiontype, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_transaction_detail",
        "stage_name": "silver_stage",
        "description": "The silver_transaction_detail table contains transaction details derived from bronze_appealcase_t_tt_ts_tm, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_humanright_detail",
        "stage_name": "silver_stage",
        "description": "The silver_humanright_detail table contains human rights details derived from bronze_appealcase_ahr_hr, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_newmatter_detail",
        "stage_name": "silver_stage",
        "description": "The silver_newmatter_detail table contains new matter details derived from bronze_appealcase_anm_nm, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_documents_detail",
        "stage_name": "silver_stage",
        "description": "The silver_documents_detail table contains document details derived from bronze_appealcase_dr_rd, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_direction_detail",
        "stage_name": "silver_stage",
        "description": "The silver_direction_detail table contains direction details derived from bronze_appealcase_rsd_sd, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_reviewspecificdirection_detail",
        "stage_name": "silver_stage",
        "description": "The silver_reviewspecificdirection_detail table contains review-specific direction details derived from bronze_review_specific_direction, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_linkedcostaward_detail",
        "stage_name": "silver_stage",
        "description": "The silver_linkedcostaward_detail table contains linked cost award details derived from bronze_cost_award_linked, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_costaward_detail",
        "stage_name": "silver_stage",
        "description": "The silver_costaward_detail table contains cost award details derived from bronze_cost_award, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_costorder_detail",
        "stage_name": "silver_stage",
        "description": "The silver_costorder_detail table contains cost order details derived from bronze_costorder, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_hearingpointschange_detail",
        "stage_name": "silver_stage",
        "description": "The silver_hearingpointschange_detail table contains hearing points change reason details derived from bronze_hearing_points_change_reason, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_hearing_points_history_detail",
        "stage_name": "silver_stage",
        "description": "The silver_hearing_points_history_detail table contains hearing points history details derived from bronze_hearing_points_history, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_appealtypecategory_detail",
        "stage_name": "silver_stage",
        "description": "The silver_appealtypecategory_detail table contains appeal type category details derived from bronze_appeal_type_category, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_appeal_grounds_detail",
        "stage_name": "silver_stage",
        "description": "The silver_appeal_grounds_detail table contains appeal grounds details derived from bronze_appeal_grounds, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_required_incompatible_adjudicator",
        "stage_name": "silver_stage",
        "description": "The silver_required_incompatible_adjudicator table contains required incompatible adjudicator details derived from bronze_required_incompatible_adjudicator, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_case_adjudicator",
        "stage_name": "silver_stage",
        "description": "The silver_case_adjudicator table contains adjudicator details derived from bronze_case_adjudicator, filtered by Appeal Category."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "silver_archive_metadata",
        "stage_name": "silver_stage",
        "description": "The silver_archive_metadata table contains archive metadata derived from bronze_case_adjudicator, filtered by Appeal Category."
    }
]

In [0]:
audit_params_gold = [
        {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "stg_statichtml_data",
        "stage_name": "segmentation_stage",
        "description": "This is the stg_statichtml_data staging table is used to derive static HTML with a one-to-one column mapping. that are applicable for gold outputs"
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "stg_statusdetail_data",
        "stage_name": "segmentation_stage",
        "description": "This is the stg_statusdetail_data staging table derives HTML data for status details in nested tabs in HTML output."
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "stg_apl_combined",
        "stage_name": "segmentation_stage",
        "description": "This is the stg_apl_combined unified stage created all consolidated data"
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "stg_apl_create_json_content",
        "stage_name": "segmentation_stage",
        "description": "This is the stg_apl_create_json_content staging table derives JSON data content.",
        "Extra_columns_mapping": {"File_name": "JSONFileName", "Status": "JSONStatus"}
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "stg_apl_create_html_content",
        "stage_name": "segmentation_stage",
        "description": "This is the stg_apl_create_html_content staging table derives HTML data content.",
        "Extra_columns_mapping": {"File_name": "HTMLFileName", "Status": "HTMLStatus"}
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "stg_apl_create_a360_content",
        "stage_name": "segmentation_stage",
        "description": "This is the stg_apl_create_a360_content staging table derives A360 data content.",
        "Extra_columns_mapping": {"File_name": "NotYetBatched", "Status": "A360Status"}
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "stg_appeals_unified",
        "stage_name": "segmentation_stage",
        "description": "This is the stg_appeals_unified  table consolidates all silver data, including HTML, JSON, and A360 content, along with its status.",
        "Extra_columns_mapping": {"File_name": "A360FileName", "Status": "A360Status"}  
    }
    ,
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "gold_appeals_with_json",
        "stage_name": "segmentation_stage",
        "description": "This is the gold_appeals_with_json  table to upload JSON gold outputs.",
        "Extra_columns_mapping": {"File_name": "JSONFileName", "Status": "UploadStatus"}
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "gold_appeals_with_html",
        "stage_name": "segmentation_stage",
        "description": "This is the gold_appeals_with_HTML  table to upload HTML gold outputs.",
        "Extra_columns_mapping": {"File_name": "HTMLFileName", "Status": "UploadStatus"}
    },
    {
        "unique_identifier_cols": ["CaseNo"],
        "table_name": "gold_appeals_with_a360",
        "stage_name": "segmentation_stage",
        "description": "This is the gold_appeals_with_a360  table to upload JSON gold outputs.",
        "Extra_columns_mapping": {"File_name": "A360FileName", "Status": "UploadStatus"}
    }
    ]

In [0]:
audit_params = audit_params_bronze + audit_params_segmentation + audit_params_silver + audit_params_gold
audit_params

In [0]:
audit_dataframes = []

for params in audit_params:
    table_name = params["table_name"]
    stage_name = params["stage_name"]
    unique_identifier_cols = params["unique_identifier_cols"]
    description = params["description"]
    extra_columns_mapping = params.get("Extra_columns_mapping", {})
    unique_identifier_desc = "_".join(unique_identifier_cols)

    try:

        df_logging = spark.read.table(f"hive_metastore.ariadm_arm_fta.{table_name}")

        df_audit = df_logging
        if len(unique_identifier_cols) > 1:
            df_audit = df_audit.withColumn(
                unique_identifier_desc, 
                concat_ws("_", *[col(c).cast("string") for c in unique_identifier_cols])
            )
        else:
            df_audit = df_audit.withColumn(unique_identifier_desc, col(unique_identifier_desc))

        # Apply extra column mappings dynamically
        for new_col, source_col in extra_columns_mapping.items():
            if source_col == "NotYetBatched":
                df_audit = df_audit.withColumn(new_col, lit("NotYetBatched"))
            else:
                df_audit = df_audit.withColumn(new_col, col(source_col))

        # Generate the audit DataFrame
        df_audit_appended = create_audit_df(
            df_audit,
            unique_identifier_desc=unique_identifier_desc,
            table_name=table_name,
            stage_name=stage_name,
            description=description
        )

        audit_dataframes.append(df_audit_appended)

    except Exception as e:

        # Table does not exist, create an audit entry for it
        status = f"Failed - Table {table_name} does not exist"

        row_data = {
            "Runid": run_id_value,
            "Unique_identifier_desc": unique_identifier_desc,
            "Unique_identifier": None,
            "Table_name": table_name,
            "Stage_name": stage_name,
            "Record_count": 0,
            "Run_dt": datetime.now(),
            "Batch_id": None,
            "Description": description,
            "File_name": None,
            "Status": status
        }

        row_df = spark.createDataFrame([row_data], schema=audit_schema)
        audit_dataframes.append(row_df)

df_final_audit = audit_dataframes[0]
for df in audit_dataframes[1:]:
    df_final_audit = df_final_audit.unionByName(df, allowMissingColumns=True)



In [0]:
display(df_final_audit)

In [0]:
df_final_audit.write.format("delta").mode("append").option("mergeSchema", "true").save(audit_delta_path)

In [0]:
dbutils.notebook.exit("Notebook completed successfully")

## Appendix

In [0]:
# df_final_audit.createOrReplaceTempView("tv_final_audit")



In [0]:
# audit_tables = [param["table_name"] for param in audit_params]
# display(len(audit_tables))
# print(audit_tables)

In [0]:

# distinct_table_names = spark.sql("""select array_distinct(collect_list(table_name)) as distinct_table_names from tv_final_audit""").first()["distinct_table_names"]

# display(len(distinct_table_names))
# print(distinct_table_names)


In [0]:
# diff_tables = list(set(audit_tables) - set(distinct_table_names))
# diff_tables

In [0]:
# %sql
# select table_name,runid, count(*) from tv_final_audit
# --  where table_name like 'stg%'
# group by all

In [0]:
# stg_firsttier_filtered = spark.table("hive_metastore.ariadm_arm_fta.stg_firsttier_filtered")
# stg_skeleton_filtered = spark.table("hive_metastore.ariadm_arm_fta.stg_skeleton_filtered")
# stg_firsttieroverdue_filtered = spark.table("hive_metastore.ariadm_arm_fta.stg_firsttieroverdue_filtered")

# union_df = stg_firsttier_filtered.union(stg_skeleton_filtered).union(stg_firsttieroverdue_filtered)
# display(union_df.filter(col("segment") == lit("ARIAFTA")).distinct().count())

In [0]:
# from pyspark.sql.functions import col

# tables_df = spark.sql("SHOW TABLES IN hive_metastore.ariadm_arm_fta")
# display(tables_df.filter(col("tableName").like("bronze_%")))