## Audit Configuration

In [0]:
import dlt
import json
from pyspark.sql.functions import *
from pyspark.sql.types import *
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from pyspark.sql.window import Window
import uuid
from delta.tables import DeltaTable

In [0]:
# audit_mnt = "/mnt/ingest00curatedsboxsilver/ARIADM/ARM/AUDIT/TD"
audit_delta_path = "/mnt/ingest00curatedsboxsilver/ARIADM/ARM/AUDIT/APPEALS/ARIAUTA/apl_uta_cr_audit_table"
hive_schema = "ariadm_arm_uta"

In [0]:


def datetime_uuid():
    dt_str = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
    return str(uuid.uuid5(uuid.NAMESPACE_DNS, dt_str))

run_id_value = datetime_uuid()

audit_schema = StructType([
    StructField("Run_Id", StringType(), True),
    StructField("Unique_Identifier_Desc", StringType(), True),
    StructField("Unique_Identifier", StringType(), True),
    StructField("Table_Name", StringType(), True),
    StructField("Stage_Name", StringType(), True),
    StructField("Record_Count", IntegerType(), True),
    StructField("Run_DateTime", TimestampType(), True),
    StructField("Batch_Id", StringType(), True),
    StructField("Description", StringType(), True),
    StructField("File_Name", StringType(), True),
    StructField("Status", StringType(), True)
])

def create_audit_df(df: DataFrame, Unique_Identifier_Desc: str,Table_Name: str, Stage_Name: str, Description: str, File_Name = False,status = False) -> None:
    """
    Creates an audit DataFrame and writes it to Delta format.

    :param df: Input DataFrame from which unique identifiers are extracted.
    :param Unique_Identifier_Desc: Column name that acts as a unique identifier.
    :param Table_Name: Name of the source table.
    :param Stage_Name: Name of the data processing stage.
    :param Description: Description of the table.
    :param additional_columns: options File_Name or Status. List of additional columns to include in the audit DataFrame.
    """

    dt_desc = datetime.utcnow()

    additional_columns = []
    if File_Name is True:
        additional_columns.append("File_Name")
    if status is True:
        additional_columns.append("Status")


     # Default to an empty list if None   
    additional_columns = [col(c) for c in additional_columns if c is not None]  # Filter out None values

    audit_df = df.select(col(Unique_Identifier_Desc).alias("Unique_Identifier"),*additional_columns)\
    .withColumn("Run_Id", lit(run_id_value))\
        .withColumn("Unique_Identifier_Desc", lit(Unique_Identifier_Desc))\
            .withColumn("Stage_Name", lit(Stage_Name))\
                .withColumn("Table_Name", lit(Table_Name))\
                    .withColumn("Run_DateTime", lit(dt_desc).cast(TimestampType()))\
                        .withColumn("Description", lit(Description))

    list_cols = audit_df.columns

    final_audit_df = audit_df.groupBy(*list_cols).agg(count("*").cast(IntegerType()).alias("Record_Count"))

    # final_audit_df.write.format("delta").mode("append").option("mergeSchema","true").save(audit_delta_path)
    
    return final_audit_df

In [0]:
# Define Delta Table Path in Azure Storage


if not DeltaTable.isDeltaTable(spark, audit_delta_path):
    print(f"🛑 Delta table '{audit_delta_path}' does not exist. Creating an empty Delta table...")

    # Create an empty DataFrame
    empty_df = spark.createDataFrame([], audit_schema)

    # Write the empty DataFrame in Delta format to create the table
    empty_df.write.format("delta").mode("overwrite").save(audit_delta_path)

    print("✅ Empty Delta table successfully created in Azure Storage.")
else:
    print(f"⚡ Delta table '{audit_delta_path}' already exists.")

In [0]:
audit_params_bronze = [
        {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_appealcase_cr_cs_ca_fl_cres_mr_res_lang",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_appealcase_cr_cs_ca_fl_cres_mr_res_lang Delta Live Table combining Appeal Case data with Case Respondent, Main Respondent, Respondent, File Location, Case Representative, Representative, and Language.."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_appealcase_ca_apt_country_detc",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_appealcase_ca_apt_country_detc Delta Live Table combining Case Appellant data with Appellant, Detention Centre, and Country information.."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_appealcase_cl_ht_list_lt_hc_c_ls_adj",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_appealcase_cl_ht_list_lt_hc_c_ls_adj Delta Live Table combining Status, Case List, Hearing Type, Adjudicator, Court, and other related details."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_appealcase_bfdiary_bftype",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_appealcase_bfdiary_bftype Delta Live Table combining BFDiary and BFType details.."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_appealcase_history_users",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_appealcase_history_users Delta Live Table combining History and Users details"
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_appealcase_link_linkdetail",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_appealcase_link_linkdetail Delta Live Table combining Case Appellant data with Appellant, Detention Centre, and Country information.."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_appealcase_status_sc_ra_cs",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_appealcase_status_sc_ra_cs Delta Live Table joining Status, CaseStatus, StatusContact, ReasonAdjourn, Language, and DecisionType details."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_appealcase_appealcatagory_catagory",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_appealcase_appealcatagory_catagory Delta Live Table for joining AppealCategory and Category tables to retrieve case and category details."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_appealcase_p_e_cfs_prr_fs_cs_hc_ag_at",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_appealcase_t_tt_ts_tm Delta Live Table for joining Transaction, TransactionType, TransactionStatus, and TransactionMethod tables to retrieve transaction details."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_status_decisiontype",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_status_decisiontype Delta Live Table for joining Status and DecisionType tables to retrieve case and decision type details."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_appealcase_t_tt_ts_tm",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_appealcase_t_tt_ts_tm Delta Live Table combining Case Appellant data with Appellant, Detention Centre, and Country information.."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_appealcase_ahr_hr",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_appealcase_ahr_hr Delta Live Table for joining AppealHumanRight and HumanRight tables to retrieve case and human rights details."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_appealcase_anm_nm",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_appealcase_anm_nm Delta  Live Table for joining AppealNewMatter and NewMatter tables to retrieve appeal and new matter details."
    }

    ,
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_appealcase_dr_rd",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_appealcase_dr_rd Delta Live Table for joining DocumentsReceived and ReceivedDocument tables to retrieve document details."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_appealcase_rsd_sd",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_appealcase_rsd_sd Delta Live Table for joining ReviewStandardDirection and StandardDirection tables to retrieve review standard direction details."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_review_specific_direction",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_review_specific_direction Delta Live Table for retrieving details from the ReviewSpecificDirection table."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_cost_award",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_cost_award Delta Live Table for retrieving details from the CostAward table."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_cost_award_linked",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_cost_award_linked Delta Live Table for retrieving details from the CostAward_linked table."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_costorder",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_costorder Delta Live Table for retrieving details from the CostOrder table."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_hearing_points_change_reason",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_hearing_points_change_reason Delta Live Table for retrieving details from the HearingPointsChangeReason table"
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_appeal_type_category",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_appeal_type_category Delta Live Table for retrieving details from the AppealTypeCategory table..ble."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_appeal_grounds",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_appeal_grounds Delta Live Table for retrieving details from the AppealTypeCategory table.."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_required_incompatible_adjudicator",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_required_incompatible_adjudicator Delta Live Table for retrieving Appeal Grounds with Appeal Type Descriptions.."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "bronze_case_adjudicator",
        "Stage_Name": "bronze_stage",
        "Description": "The bronze_case_adjudicator Delta Live Table for retrieving Appeal Grounds with Appeal Type Descriptions.."
    }
]

In [0]:
audit_params_segmentation = [
        {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "stg_appealcasestatus_filtered",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the stg_appealcasestatus_filtered table segmentation logic to identify distinct Appeals categories so in later stages we can identify the distinct appeal categories."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "stg_firsttier_filtered",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the stg_firsttier_filtered table to Identify cases 'FT Retained - ARM catagory' and mark is as ARIAUTA(FirstTierApplication)"
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "stg_skeleton_filtered",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the  skeleton cases using the segmentation logic-and mark is as ARIAUTA(FirstTierApplication)"
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "stg_uppertribunalretained_filtered",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the stg_uppertribunalretained_filtered table to Identify cases 'UT Retained' catagory and mark is as ARIAUTA(UpperTribinalApplication)"
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "stg_firsttieroverdue_filtered",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the stg_firsttieroverdue_filtered table to Identify cases 'FT Overdue' - ARM catagory and mark is as ARIAUTA(FirstTierApplication)"
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "stg_uppertribunaloverdue_filtered",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the stg_uppertribunaloverdue_filtered table to Identify cases 'UT Overdue' - ARM catagory and mark is as ARIAUTA(UpperTribinalApplication)"
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "stg_filepreservedcases_filtered",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the stg_filepreservedcases_filtered table to Identify cases with CaseType = 1 and DeptID == 520 and make is as 'ARIAFilePreservedCases' - ARM catagory and mark is as ARIAUTA(UpperTribinalApplication)"
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "stg_appeals_filtered",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the stg_appeals_filtered table to Identify cases  ARIAUTA(FirstTierApplication)"
    }
    
    ]


In [0]:
audit_params_silver = [
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_appealcase_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_appealcase_detail table contains Appeal Case details derived from bronze_appealcase_cr_cs_ca_fl_cres_mr_res_lang, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_applicant_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_applicant_detail table contains case applicant details derived from bronze_appealcase_ca_apt_country_detc, filtered by Appeal Category and where CaseAppellantRelationship is null, excluding dependents."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_dependent_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_dependent_detail table contains case applicant dependents details derived from bronze_appealcase_ca_apt_country_detc, filtered by Appeal Category and where CaseAppellantRelationship is not null, excluding applicants."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_list_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_list_detail table contains list details derived from bronze_appealcase_cl_ht_list_lt_hc_c_ls_adj, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_dfdairy_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_dfdairy_detail table contains BFDiary details derived from bronze_appealcase_bfdiary_bftype, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_history_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_history_detail table contains history details derived from bronze_appealcase_history_users, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_link_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_link_detail table contains link details derived from bronze_appealcase_history_users, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_status_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_status_detail table contains status details derived from bronze_appealcase_appealcatagory_catagory, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_appealcategory_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_appealcategory_detail table contains appeal category details derived from bronze_appealcase_history_users, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_case_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_case_detail table contains case details derived from bronze_appealcase_p_e_cfs_prr_fs_cs_hc_ag_at, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_statusdecisiontype_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_statusdecisiontype_detail table contains status decision type details derived from bronze_status_decisiontype, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_transaction_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_transaction_detail table contains transaction details derived from bronze_appealcase_t_tt_ts_tm, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_humanright_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_humanright_detail table contains human rights details derived from bronze_appealcase_ahr_hr, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_newmatter_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_newmatter_detail table contains new matter details derived from bronze_appealcase_anm_nm, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_documents_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_documents_detail table contains document details derived from bronze_appealcase_dr_rd, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "sliver_direction_detail",
        "Stage_Name": "silver_stage",
        "Description": "The sliver_direction_detail table contains direction details derived from bronze_appealcase_rsd_sd, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_reviewspecificdirection_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_reviewspecificdirection_detail table contains review-specific direction details derived from bronze_review_specific_direction, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_linkedcostaward_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_linkedcostaward_detail table contains linked cost award details derived from bronze_cost_award_linked, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_costaward_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_costaward_detail table contains cost award details derived from bronze_cost_award, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_costorder_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_costorder_detail table contains cost order details derived from bronze_costorder, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_hearingpointschange_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_hearingpointschange_detail table contains hearing points change reason details derived from bronze_hearing_points_change_reason, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_hearingpointshistory_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_hearingpointshistory_detail table contains hearing points history details derived from bronze_hearing_points_history, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_appealtypecategory_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_appealtypecategory_detail table contains appeal type category details derived from bronze_appeal_type_category, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_appealgrounds_detail",
        "Stage_Name": "silver_stage",
        "Description": "The silver_appealgrounds_detail table contains appeal grounds details derived from bronze_appeal_grounds, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_required_incompatible_adjudicator",
        "Stage_Name": "silver_stage",
        "Description": "The silver_required_incompatible_adjudicator table contains required incompatible adjudicator details derived from bronze_required_incompatible_adjudicator, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "silver_case_adjudicator",
        "Stage_Name": "silver_stage",
        "Description": "The silver_case_adjudicator table contains adjudicator details derived from bronze_case_adjudicator, filtered by Appeal Category."
    },
    {
        "Unique_Identifier_cols": ["client_identifier"],
        "Table_Name": "silver_archive_metadata",
        "Stage_Name": "silver_stage",
        "Description": "The silver_archive_metadata table contains archive metadata derived from bronze_case_adjudicator, filtered by Appeal Category."
    }
]

In [0]:
audit_params_gold = [
        {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "stg_statichtml_data",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the stg_statichtml_data staging table is used to derive static HTML with a one-to-one column mapping. that are applicable for gold outputs"
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "stg_statusdetail_data",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the stg_statusdetail_data staging table derives HTML data for status details in nested tabs in HTML output."
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "stg_apl_combined",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the stg_apl_combined unified stage created all consolidated data"
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "stg_apl_create_json_content",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the stg_apl_create_json_content staging table derives JSON data content.",
        "extra_columns": ["File_name", "Status"]
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "stg_apl_create_html_content",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the stg_apl_create_html_content staging table derives HTML data content.",
        "extra_columns": ["File_name", "Status"]
    },
    {
        "Unique_Identifier_cols": ["client_identifier"],
        "Table_Name": "stg_apl_create_a360_content",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the stg_apl_create_a360_content staging table derives A360 data content.",
        "extra_columns": ["File_name", "Status"]
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "stg_appeals_unified",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the stg_appeals_unified  table consolidates all silver data, including HTML, JSON, and A360 content, along with its status.",
        "extra_columns": ["File_name", "Status"]
    }
    ,
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "gold_appeals_with_json",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the gold_appeals_with_json  table to upload JSON gold outputs.",
        "extra_columns": ["File_name", "Status"]
    },
    {
        "Unique_Identifier_cols": ["CaseNo"],
        "Table_Name": "gold_appeals_with_html",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the gold_appeals_with_HTML  table to upload HTML gold outputs.",
        "extra_columns": ["File_name", "Status"]
    },
    {
        "Unique_Identifier_cols": ["A360_BatchId"],
        "Table_Name": "gold_appeals_with_a360",
        "Stage_Name": "segmentation_stage",
        "Description": "This is the gold_appeals_with_a360  table to upload JSON gold outputs.",
        "extra_columns": ["File_name", "Status"]
    }
    ]

In [0]:
audit_params = audit_params_bronze + audit_params_segmentation + audit_params_silver + audit_params_gold
audit_params

In [0]:
audit_dataframes = []

for params in audit_params:
    Table_Name = params["Table_Name"]
    Stage_Name = params.get("Stage_Name", None)
    Unique_Identifier_cols = params["Unique_Identifier_cols"]
    Description = params["Description"]
    extra_columns = params["extra_columns"] if "extra_columns" in params else []
    Unique_Identifier_Desc = "_".join(Unique_Identifier_cols)

    try:

        df_logging = spark.read.table(f"hive_metastore.{hive_schema}.{Table_Name}")

        df_audit = df_logging
        if len(Unique_Identifier_cols) > 1:
            df_audit = df_audit.withColumn(
                Unique_Identifier_Desc, 
                concat_ws("_", *[col(c).cast("string") for c in Unique_Identifier_cols])
            )
        else:
            df_audit = df_audit.withColumn(Unique_Identifier_Desc, col(Unique_Identifier_Desc).cast("string"))

     
        # Apply extra column mappings dynamically
        if len(extra_columns) <= 1:
            missing_columns = list(set(["File_Name", "Status"]) - set(extra_columns))
            for new_col in missing_columns:
                df_audit = df_audit.withColumn(new_col, lit(None))

        # Generate the audit DataFrame
        df_audit_appended = create_audit_df(
            df_audit,
            Unique_Identifier_Desc=Unique_Identifier_Desc,
            Table_Name=Table_Name,
            Stage_Name=Stage_Name,
            Description=Description,
            File_Name = True,
            status = True
        )

        audit_dataframes.append(df_audit_appended)

        print(f"✅ Successfully processed table: {Table_Name}")

    except Exception as e:
        print(f"🛑 Failed to process table: {Table_Name}. Error: {str(e)}")
        failed_table = f"Table {Table_Name} does not exist"


        # Table does not exist, create an audit entry for it
        status = f"Failed - Table {Table_Name} does not exist"

        row_data = {
            "Run_Id": run_id_value,
            "Unique_Identifier_Desc": Unique_Identifier_Desc,
            "Unique_Identifier": None,
            "Table_Name": Table_Name,
            "Stage_Name": Stage_Name,
            "Record_Count": 0,
            "Run_DateTime": datetime.now(),
            "Batch_Id": None,
            "Description": Description,
            "File_Name": None,
            "Status": status
        }

        row_df = spark.createDataFrame([row_data], schema=audit_schema)
        audit_dataframes.append(row_df)


df_final_audit = audit_dataframes[0]
for df in audit_dataframes[1:]:
    df_final_audit = df_final_audit.unionByName(df, allowMissingColumns=True)

In [0]:
df_final_audit.write.format("delta").mode("append").option("mergeSchema", "true").save(audit_delta_path)

In [0]:
dbutils.notebook.exit("Notebook completed successfully")

## Appendix

In [0]:
# df_final_audit.createOrReplaceTempView("tv_final_audit")



In [0]:
# %sql
# select * from tv_final_audit
# where Table_Name like 'gold%'

In [0]:
# audit_tables = [param["Table_Name"] for param in audit_params]
# display(len(audit_tables))
# print(audit_tables)

In [0]:

# distinct_Table_Names = spark.sql("""select array_distinct(collect_list(Table_Name)) as distinct_Table_Names from tv_final_audit""").first()["distinct_Table_Names"]

# display(len(distinct_Table_Names))
# print(distinct_Table_Names)


In [0]:
# diff_tables = list(set(audit_tables) - set(distinct_Table_Names))
# diff_tables

In [0]:
# %sql
# select Table_Name,runid, count(*) from tv_final_audit
# --  where Table_Name like 'stg%'
# group by all

In [0]:
# stg_firsttier_filtered = spark.table("hive_metastore.ariadm_arm_fta.stg_firsttier_filtered")
# stg_skeleton_filtered = spark.table("hive_metastore.ariadm_arm_fta.stg_skeleton_filtered")
# stg_firsttieroverdue_filtered = spark.table("hive_metastore.ariadm_arm_fta.stg_firsttieroverdue_filtered")

# union_df = stg_firsttier_filtered.union(stg_skeleton_filtered).union(stg_firsttieroverdue_filtered)
# display(union_df.filter(col("segment") == lit("ARIAFTA")).distinct().count())

In [0]:
# from pyspark.sql.functions import col

# tables_df = spark.sql("SHOW TABLES IN hive_metastore.ariadm_arm_fta")
# display(tables_df.filter(col("tableName").like("bronze_%")))

In [0]:
# spark.read.format("delta").load(audit_delta_path).select("Table_Name").distinct().display()