In [0]:
import dlt
import json
from pyspark.sql.functions import *
from pyspark.sql.types import *
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from pyspark.sql.window import Window
import uuid
from delta.tables import DeltaTable

In [0]:
## Set up configs

In [0]:
config = spark.read.option("multiline", "true").json("dbfs:/configs/config.json")
env = config.first()["env"].strip().lower()
lz_key = config.first()["lz_key"].strip().lower()

In [0]:
keyvault_name = f"ingest{lz_key}-meta002-{env}"

In [0]:
# Access the Service Principle secrets from keyvaults
client_secret = dbutils.secrets.get(scope=keyvault_name, key='SERVICE-PRINCIPLE-CLIENT-SECRET')
tenant_id = dbutils.secrets.get(scope=keyvault_name, key='SERVICE-PRINCIPLE-TENANT-ID')
client_id = dbutils.secrets.get(scope=keyvault_name, key='SERVICE-PRINCIPLE-CLIENT-ID')

In [0]:
curated_storage_account = f"ingest{lz_key}curated{env}"

silver_curated_container = "silver"

In [0]:
storage_accounts = [curated_storage_account]

for storage_account in storage_accounts:
    configs = {
            f"fs.azure.account.auth.type.{storage_account}.dfs.core.windows.net": "OAuth",
            f"fs.azure.account.oauth.provider.type.{storage_account}.dfs.core.windows.net":
                "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
            f"fs.azure.account.oauth2.client.id.{storage_account}.dfs.core.windows.net": client_id,
            f"fs.azure.account.oauth2.client.secret.{storage_account}.dfs.core.windows.net": client_secret,
            f"fs.azure.account.oauth2.client.endpoint.{storage_account}.dfs.core.windows.net":
                f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
        }
    for key,val in configs.items():
        spark.conf.set(key,val)

In [0]:
# Print out the auth config for each storage account to confirm
for storage_account in storage_accounts:
    key = f"fs.azure.account.auth.type.{storage_account}.dfs.core.windows.net"
    print(f"{key}: {spark.conf.get(key, 'MISSING')}")

In [0]:
audit_delta_path = f"abfss://{silver_curated_container}@{curated_storage_account}.dfs.core.windows.net/ARIADM/ARM/AUDIT/SBAILS/bl_cr_audit_table"

In [0]:
# audit_delta_path = "/mnt/ingest00curatedsboxsilver/ARIADM/ARM/AUDIT/BAILS/bl_cr_audit_table"
hive_schema = "aria_sbails"

In [0]:


def datetime_uuid():
    dt_str = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
    return str(uuid.uuid5(uuid.NAMESPACE_DNS, dt_str))

run_id_value = datetime_uuid()

audit_schema = StructType([
    StructField("Run_Id", StringType(), True),
    StructField("Unique_Identifier_Desc", StringType(), True),
    StructField("Unique_Identifier", StringType(), True),
    StructField("Table_Name", StringType(), True),
    StructField("Stage_Name", StringType(), True),
    StructField("Record_Count", IntegerType(), True),
    StructField("Run_DateTime", TimestampType(), True),
    StructField("Batch_Id", StringType(), True),
    StructField("Description", StringType(), True),
    StructField("File_Name", StringType(), True),
    StructField("Status", StringType(), True)
])

def create_audit_df(df: DataFrame, Unique_Identifier_Desc: str,Table_Name: str, Stage_Name: str, description: str, File_Name = False,status = False,batchid = False) -> None:
    """
    Creates an audit DataFrame and writes it to Delta format.

    :param df: Input DataFrame from which unique identifiers are extracted.
    :param Unique_Identifier_Desc: Column name that acts as a unique identifier.
    :param Table_Name: Name of the source table.
    :param Stage_Name: Name of the data processing stage.
    :param description: Description of the table.
    :param File_Name: Optional column name for file names. Default is False.
    :param status: Optional column name for status. Default is False.
    :param Batch_Id: Optional column name for batch ID. Default is False
    """

    dt_desc = datetime.utcnow()

    additional_columns = []
    if File_Name is True:
        additional_columns.append("File_Name")
    if status is True:
        additional_columns.append("Status")
    if batchid is True:
        additional_columns.append("batchid")


     # Default to an empty list if None   
    additional_columns = [col(c) for c in additional_columns if c is not None]  # Filter out None values

    audit_df = df.select(col(Unique_Identifier_Desc).alias("Unique_Identifier"),*additional_columns)\
    .withColumn("Run_Id", lit(run_id_value))\
        .withColumn("Unique_Identifier_Desc", lit(Unique_Identifier_Desc))\
            .withColumn("Stage_Name", lit(Stage_Name))\
                .withColumn("Table_Name", lit(Table_Name))\
                    .withColumn("Run_DateTime", lit(dt_desc).cast(TimestampType()))\
                        .withColumn("Description", lit(description))

    list_cols = audit_df.columns

    final_audit_df = audit_df.groupBy(*list_cols).agg(count("*").cast(IntegerType()).alias("Record_Count"))

    final_audit_df.write.format("delta").mode("append").option("mergeSchema","true").save(audit_delta_path)
    
    return final_audit_df

In [0]:
# Define Delta Table Path in Azure Storage


if not DeltaTable.isDeltaTable(spark, audit_delta_path):
    print(f"🛑 Delta table '{audit_delta_path}' does not exist. Creating an empty Delta table...")

    # Create an empty DataFrame
    empty_df = spark.createDataFrame([], audit_schema)

    # Write the empty DataFrame in Delta format to create the table
    empty_df.write.format("delta").mode("overwrite").save(audit_delta_path)

    print("✅ Empty Delta table successfully created in Azure Storage.")
else:
    print(f"⚡ Delta table '{audit_delta_path}' already exists.")

In [0]:
audit_params = [
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "bronze_sbail_ac_cr_cs_ca_fl_cres_mr_res_lang",
        "Stage_Name": "bronze_stage",
        "description": "The bronze_sbail_ac_cr_cs_ca_fl_cres_mr_res_lang table consolidates key details of appeal cases, including case numbers, court preferences, hearing dates, language requirements, and respondents, alongside associated embassy, representative, and cost award information. It joins multiple tables to provide a comprehensive dataset for tracking legal proceedings and related entities"
    },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "bronze_sbail_ac_ca_apt_country_detc",
        "Stage_Name": "bronze_stage",
        "description": "The bronze_sbail_ac_ca_apt_country_detc table consolidates key details related to appellants in bail cases. It joins data from multiple sources, including case appellant records, appellant personal details, detention centre information, and country data. The table provides a comprehensive view of the appellant’s identity, detention status, and nationality, linking them to their respective case numbers. Key attributes include appellant name, birth date, address, telephone, email, detention centre details, and country information, ensuring a structured dataset for tracking individuals involved in bail proceedings."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "bronze_sbail_ac_cl_ht_list_lt_hc_c_ls_adj",
        "Stage_Name": "bronze_stage",
        "description": "The bronze_sbail_ac_cl_ht_list_lt_hc_c_ls_adj table consolidates information on case statuses, hearings, listings, courts, adjudicators, and appeal outcomes in bail cases. It integrates data from multiple sources, including case lists, hearing types, list types, courts, hearing centres, list sittings, adjudicators, and decision types, providing a structured view of legal proceedings. Key attributes include case status, hearing date, list type, court name, hearing centre, adjudicator details, and appeal case notes, enabling efficient tracking of hearing schedules and case outcomes"
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "bronze_sbail_ac_bfdiary_bftype",
        "Stage_Name": "bronze_stage",
        "description": "The bronze_sbail_ac_bfdiary_bftype table consolidates information on BF (Bring Forward) diary entries related to bail cases. It links BF diary records with their corresponding BF types, providing a structured overview of case follow-ups and deadlines. Key attributes include case number, BF date, entry details, completion date, and BF type description, enabling efficient tracking of pending and completed case actions."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "bronze_sbail_ac_history_users",
        "Stage_Name": "bronze_stage",
        "description": "The bronze_sbail_ac_history_users table captures a detailed history of actions taken on bail cases, linking historical records with user information. It integrates data from the history and users tables, providing insights into case modifications, timestamps, action types, and user activity. Key attributes include case number, history ID, action date, history type, comments, deleted by information, and the full name of the user responsible for the action, ensuring a transparent audit trail of case updates"
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "bronze_sbail_ac_link_linkdetail",
        "Stage_Name": "bronze_stage",
        "description": "The bronze_sbail_ac_link_linkdetail table captures case linkages and related appellant details within bail cases. It integrates data from case links, link details, and appellant records, providing insights into case associations and relevant individuals. Key attributes include link number, case number, appellant’s name, title, and any associated comments from link details, facilitating the tracking of case relationships and appellant connections"
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "bronze_sbail_status_sc_ra_cs",
        "Stage_Name": "bronze_stage",
        "description": "The bronze_sbail_status_sc_ra_cs table consolidates information related to case statuses, contacts, adjournment reasons, language preferences, list types, hearing types, and judiciary assignments."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "bronze_sbail_ac_appealcategory_category",
        "Stage_Name": "bronze_stage",
        "description": "The bronze_sbail_ac_appealcategory_category table records appeal categories and their associated details for bail cases. It integrates data from appeal category records and general category descriptions, allowing for a structured view of case classifications."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "bronze_case_surety_query",
        "Stage_Name": "bronze_stage",
        "description": "The bronze_case_surety_query table captures surety details related to bail cases, providing insights into individuals or entities offering financial guarantees for an appellant's release. It includes surety identity, contact details, financial commitments (recognizance and security amounts), lodging dates, solicitor involvement, and communication details."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "judicial_requirement",
        "Stage_Name": "bronze_stage",
        "description": "The judicial_requirement table tracks judicial assignments and requirements for bail cases. It links case adjudicators to their assigned judges, providing a structured view of judicial oversight in legal proceedings. Key attributes include case number, adjudicator requirements, and judge details (surname, forenames, and title), ensuring visibility into judicial involvement and case management."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "linked_cases_cost_award",
        "Stage_Name": "bronze_stage",
        "description": "The linked_cases_cost_award table consolidates cost awards and their associations with linked cases, appellants, and appeal stages. It provides a structured overview of financial awards in legal proceedings, linking them to case numbers, appellants, appeal stages, and case statuses."
            },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "silver_normal_sbail",
        "Stage_Name": "segmentation_stage",
        "description": "The silver_normal_sbail table processes and filters normal bail cases from raw datasets, refining the data for analytical and reporting purposes. It identifies archivable cases by determining the most recent status record for each case, filtering out specific file locations, and applying conditions based on case history comments and decision dates. The table ensures that only archivable normal bail cases are retained, providing structured insights into case numbers and bail type classifications."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "silver_legal_hold_normal_sbail",
        "Stage_Name": "segmentation_stage",
        "description": "The silver_legal_hold_normal_sbail table identifies normal bail cases under legal hold by analyzing case history and file location data. It filters cases where indefinite retention is indicated in the history comments while excluding specific file locations. The table ensures that only cases meeting the legal hold criteria are retained, providing a structured dataset with case numbers and a classification of 'BailLegalHold' for tracking and auditing purposes."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "silver_sbail_combined_segmentation_nb_lhnb",
        "Stage_Name": "segmentation_stage",
        "description": "The silver_sbail_combined_segmentation_nb_lhnb_ table consolidates normal bail cases and legal hold normal bail cases into a single dataset. It creates a unified view of bail case classifications. Key attributes include case numbers and corresponding bail types, providing a structured dataset for further analysis and decision-making."
        },
            ### Silver tables ###
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "silver_sbail_m1_case_details",
        "Stage_Name": "silver_stage",
        "description": "The silver_sbail_m1_case_details table refines and enriches bail case details by integrating segmentation data and bronze-level case records. It links bail cases with their segmentation type (Normal Bail, Legal Hold, or Scottish Bail) while applying meaningful descriptions to key categorical fields, such as Bail Type, Court Preference, Interpreter Requirement, Cost Award Types, and Paying Party Details. The result is a well-structured dataset that enables clear classification and analysis of bail cases based on their characteristics and financial/legal implications."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "silver_sbail_m2_case_appellant",
        "Stage_Name": "silver_stage",
        "description": "The silver_sbail_m2_case_appellant table refines and enriches appellant-related details in bail cases by integrating segmentation data and bronze-level case appellant records. It links appellants to their bail case type while applying meaningful descriptions to the detention status of appellants. The result is a structured dataset that facilitates better analysis of appellant demographics, detention conditions, and case classifications within the bail system."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "silver_sbail_m3_hearing_details",
        "Stage_Name": "silver_stage",
        "description": "The silver_sbail_m3_hearing_details table refines and consolidates hearing-related details for bail cases by grouping and aggregating key court and hearing attributes. It integrates bronze-level hearing records and bail segmentation data, providing structured insights into case status, hearing schedules, court locations, and adjudicator assignments. The table distinguishes between the presiding judge (JudgeFT) and court clerk/usher (CourtClerkUsher) based on their role in the proceedings. This dataset enables efficient tracking of hearing events, adjudicator assignments, and court proceedings within bail case management."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "silver_sbail_m4_bf_diary",
        "Stage_Name": "silver_stage",
        "description": "The silver_sbail_m4_bf_diary table refines and consolidates BF (Bring Forward) diary records related to bail cases. It integrates bronze-level BF diary data with bail segmentation classifications, ensuring that each case is categorized under Normal Bail, Legal Hold, or Scottish Bail Funds. This table provides structured insights into BF dates, diary entries, completion statuses, and BF types, enabling effective tracking of pending actions and case follow-ups within the bail process."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "silver_sbail_m5_history",
        "Stage_Name": "silver_stage",
        "description": "The silver_sbail_m5_history table consolidates historical actions and user activity related to bail cases. It integrates bronze-level case history records with bail segmentation classifications, ensuring that each case is categorized under Normal Bail and Legal Hold. Key attributes include historical changes, timestamps, action types, user activity, and related case updates, providing a transparent and structured audit trail for case modifications and decision-making processes."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "silver_sbail_m6_link",
        "Stage_Name": "silver_stage",
        "description": "The silver_sbail_m6_link table consolidates case linkages and related details for bail cases. It integrates bronze-level case link data with bail segmentation classifications, ensuring that each case is categorized under Normal Bail or Legal Hold. Key attributes include case number, link number, link detail comments, and a concatenated full name (title, forenames, and name), providing a structured overview of case relationships and associated appellants for efficient case tracking and analysis."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "silver_sbail_m7_status",
        "Stage_Name": "silver_stage",
        "description": "The silver_sbail_m7_status table refines bail status details by integrating bronze-level status records with bail segmentation classifications, ensuring each case is categorized under Normal Bail or Legal Hold. It enhances categorical fields by providing descriptive labels for bail conditions, interpreter requirements, residence orders, reporting orders, and status parties. This structured dataset facilitates clearer analysis of case statuses, bail conditions, and legal requirements for improved decision-making and case management."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "silver_sbail_m8",
        "Stage_Name": "silver_stage",
        "description": "The silver_sbail_m8 table refines appeal category details by integrating bronze-level appeal category records with bail segmentation classifications, ensuring each case is categorized under Normal Bail or Legal Hold. It provides structured insights into case numbers, appeal categories, and associated classifications, enabling clearer tracking and analysis of case types and their legal classifications within the bail system."
        },
            {
        "Unique_Identifier_cols": "client_identifier",
        "Table_Name": "silver_sbail_meta_data",
        "Stage_Name": "silver_stage",
        "description": "The silver_sbail_meta_data table consolidates key metadata from bail case details and appellant records, creating a structured dataset for tracking and categorizing bail cases. It assigns client identifiers, event dates, region, publisher, and record classifications while standardizing date formats and ensuring each case is assigned to a batch ID for processing. The table provides essential attributes to cteate the manifest file."
            },
            ### Staging tables ###
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "stg_m1_m2",
        "Stage_Name": "staging_stage",
        "description": "The stg_m1_m2 table combines data from silver-level m1 and m2 tables, providing a consolidated table with distinct CaseNo."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "stg_m3_m7",
        "Stage_Name": "staging_stage",
        "description": "The stg_m3_m7 Delta Live Table joins adjudicator and hearing data from silver_sbail_m3_hearing_details with bail status information from silver_sbail_m7_status. It performs a pivot to group adjudicators by their position, creates a full name column for each adjudicator, and merges these with matching case and status IDs. The final output is a nested table grouped by CaseNo, where each case contains a list of all associated status records as structs."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "stg_m7_m3_statuses",
        "Stage_Name": "staging_stage",
        "description": "The stg_m3_m7 table combines adjudicator and hearing details from silver_sbail_m3_hearing_details with bail status records from silver_sbail_m7_status, creating a nested structure of all status entries per case. The stg_m7_m3_statuses table then enhances this by extracting, for each case, the highest StatusId (as MaxCaseStatusDescription) and the most relevant non-null LanguageDescription (as SecondaryLanguage), joining these back to provide enriched, case-level data."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "stg_m1_m2_m3_m7",
        "Stage_Name": "staging_stage",
        "description": "The stg_m1_m2_m3_m7 table merges core case details from stg_m1_m2 with enriched status and adjudicator information from stg_m7_m3_statuses using a left join on CaseNo, producing a unified view of case metadata, hearing details, and the latest status insights for downstream processing or reporting."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "stg_m1_m2_m3_m5_m7",
        "Stage_Name": "staging_stage",
        "description": "The stg_m1_m2_m3_m5_m7 table enriches the unified case view from stg_m1_m2_m3_m7 by joining it with historical activity data from silver_sbail_m5_history. It aggregates each case’s history entries into a structured list (m5_history_details) and extracts key insights, including the latest document comment for HistType = 16 (last_document) and the most recent file location for HistType = 6 (file_location), providing a comprehensive case-level dataset for downstream use."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "stg_m1_m2_m3_m4_m5_m7",
        "Stage_Name": "staging_stage",
        "description": "The stg_m1_m2_m3_m4_m5_m7 table builds on the enriched case data from stg_m1_m2_m3_m5_m7 by incorporating BF diary information from silver_sbail_m4_bf_diary. For each case, it aggregates BF entries—such as BF date, type, entry text, and completion date—into a nested list called bfdiary_details, creating a comprehensive view that includes case metadata, statuses, adjudication, history, and diary actions."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "stg_m1_m2_m3_m4_m5_m7_m8",
        "Stage_Name": "staging_stage",
        "description": "The stg_m1_m2_m3_m4_m5_m7_m8 table extends the consolidated case dataset by joining in appeal category data from silver_sbail_m8. For each case, it aggregates related appeal categories and flags into a nested list called appeal_category_details, resulting in a rich, case-centric view that combines core case details, adjudicator and status insights, history records, BF diary entries, and appeal classification metadata."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "stg_m1_m2_m3_m4_m5_m6_m7_m8",
        "Stage_Name": "staging_stage",
        "description": "The stg_m1_m2_m3_m4_m5_m6_m7_m8 table completes the comprehensive case view by integrating linked file information from silver_sbail_m6_link. It aggregates each case’s linked documents—such as comments, link numbers, and full names—into a nested linked_files_details column. This final table delivers an all-encompassing dataset with case metadata, adjudications, statuses, history, BF diary entries, appeal categories, and associated linked files for downstream consumption."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "stg_m1_m2_m3_m4_m5_m6_m7_m8_cs",
        "Stage_Name": "staging_stage",
        "description": "The stg_m1_m2_m3_m4_m5_m6_m7_m8_cs table extends the full case view by joining in financial surety details from bronze_case_surety_query. For each case, it aggregates financial condition entries—including sponsor name, address, contact details, solicitor, and security amounts—into a nested financial_condition_details list. This final enriched dataset supports dynamic HTML generation using mappings like case_surety_replacement, enabling accurate population of financial guarantee information for each case."
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "final_staging_sbails",
        "Stage_Name": "staging_stage",
        "description": "The final_staging_sbails table represents the complete, enriched bail case dataset by joining the comprehensive case view from stg_m1_m2_m3_m4_m5_m6_m7_m8_cs with linked cost award case data from linked_cases_cost_award. For each case, it aggregates associated cost award records—such as applicant and paying parties, award type, decisions, and appeal outcomes—into a nested list called linked_cases_aggregated, creating a unified structure ready for downstream analytics, reporting, or templated document generation."
            },


            ### Gold tables ###

            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "create_sbails_html_content",
        "Stage_Name": "gold_stage",
        "description": "The create_sbails_html_content table generates HTML content for bail cases, transforming structured case data into formatted HTML outputs. It reads from final_staging_sbails, processes the data using the create_html function, and assigns a unique file name for each case, ensuring proper formatting by replacing special characters in case numbers. This table supports automated document generation for bail case summaries, facilitating downstream reporting, storage, and retrieval.",
        "File_name": True,
        "status": True
        },
            {
        "Unique_Identifier_cols": "CaseNo",
        "Table_Name": "create_sbails_json_content",
        "Stage_Name": "gold_stage",
        "description": "The create_sbails_json_content table generates JSON content for bail cases, transforming structured case data into serialized JSON format. It reads from final_staging_sbails, converts all fields into a nested JSON structure, and assigns a unique file name for each case by replacing special characters in case numbers. This table facilitates automated JSON document generation, enabling structured data storage, retrieval, and downstream processing for bail case management and reporting.",
        "File_name": True,
        "status": True
        },
            {
        "Unique_Identifier_cols": "client_identifier",
        "Table_Name": "create_sbails_a360_content",
        "Stage_Name": "gold_stage",
        "description": "The create_sbails_json_content table generates JSON content for bail cases, transforming structured case data into serialized JSON format. It reads from final_staging_sbails, converts all fields into a nested JSON structure, and assigns a unique file name for each case by replacing special characters in case numbers. This table facilitates automated JSON document generation, enabling structured data storage, retrieval, and downstream processing for bail case management and reporting.",
        "status": True
        },
            {
        "Unique_Identifier_cols": "client_identifier",
        "Table_Name": "gold_sbails_HTML_JSON_a360",
        "Stage_Name": "gold_stage",
        "description": "The gold_sbails_HTML_JSON_a360 table consolidates structured bail case content, integrating HTML, JSON, and A360 formats for A360 processing. It ensures data integrity by enforcing quality checks to exclude records containing errors in any content format and organizes cases into batches for streamlined processing.",
        "batchid": True,
        },
            {
        "Unique_Identifier_cols": "HTMLFileName",
        "Table_Name": "save_html_to_blob",
        "Stage_Name": "gold_stage",
        "description": "The save_html_to_blob table is responsible for uploading generated HTML content for bail cases to blob storage. It reads from create_sbails_html_content, repartitions the data for optimized parallel processing, and applies the upload_to_blob_udf function to transfer each HTML file to blob storage. A new column, HTMLTransferStatus, is added to track the status of each upload, ensuring visibility into successful and failed transfers. This table facilitates automated storage and retrieval of bail case HTML documents for further processing and access.",
        "status": True
        },
            {
        "Unique_Identifier_cols": "JSONFileName",
        "Table_Name": "save_json_to_blob",
        "Stage_Name": "gold_stage",
        "description": "The save_json_to_blob table is responsible for uploading generated JSON content for bail cases to blob storage. It reads from create_sbails_json_content, repartitions the data for optimized parallel processing, and applies the upload_to_blob_udf function to transfer each JSON file to blob storage. A new column, JSONTransferStatus, is added to track the status of each upload, ensuring visibility into successful and failed transfers. This table facilitates automated storage and retrieval of bail case JSON documents for further processing and access.",
        "status": True
        },
            {
        "Unique_Identifier_cols": "batchid",
        "Table_Name": "gold_sbails_a360",
        "Stage_Name": "gold_stage",
        "description": "The save_a360_to_blob table is responsible for uploading generated A360 content for bail cases to blob storage. It reads from create_sbails_a360_content, repartitions the data for optimized parallel processing, and applies the upload_to_blob_udf function to transfer each A360 file to blob storage. A new column, A360TransferStatus, is added to track the status of each upload, ensuring visibility into successful and failed transfers. This table facilitates automated storage and retrieval of bail case A360 documents for further processing and access.",
        "File_name": True,
        "status": True
        }

            
]

In [0]:
for params in audit_params:
    unique_identifier_cols = params["Unique_Identifier_cols"]
    table_name = params["Table_Name"]
    stage_name = params["Stage_Name"]
    description = params["description"]
    File_name = params.get("File_name", False)
    status = params.get("status", False)

    try:
        df_loaded = spark.read.table(f"hive_metastore.{hive_schema}.{table_name}")
        create_audit_df(df_loaded, unique_identifier_cols, table_name, stage_name, description, File_name, status)
    
    except Exception as e:
        print(f"Error: {e}")


In [0]:
audit_delta_path

In [0]:
df = spark.read.format("delta").load(audit_delta_path)

df.display()

In [0]:
df.select("Table_Name").distinct().display()

In [0]:
df.select("Table_Name").distinct().display()