In [0]:
%pip install /dbfs/FileStore/packages/shared_functions-0.6.6-py3-none-any.whl
dbutils.library.restartPython()

# Active Appeals Ended State (Gold)
<table style='float:left;'>
   <tbody>
      <tr>
         <td style='text-align: left;'><b>Name: </b></td>
         <td>GOLD_ACTIVE_APPEALS_ENDED</td>
      </tr>
      <tr>
         <td style='text-align: left;'><b>Description: </b></td>
         <td>Notebook to load data for the Ended state based on given mappings.</td>
      </tr>
      <tr>
         <td style='text-align: left;'><b>First Created: </b></td>
         <td>Feb-2026</td>
      </tr>
      <tr>
         <th style='text-align: left;'><b>Changelog (JIRA ref/initials/date):</b></th>
         <th>Comments</th>
      </tr>
      <tr>
         <td style='text-align: left;'><a href="https://tools.hmcts.net/jira/browse/ARIADM-1481">ARIADM-1481</a>/MU/JAN-2026</td>
         <td>Implementing mappings for the Ended state.</td>
      </tr>
   </tbody>
</table>

In [0]:
import importlib
import shared_functions.paymentPending as PP
import shared_functions.appealSubmitted as APS
import shared_functions.AwaitingEvidenceRespondant_a as AERa
import shared_functions.listing as L
import shared_functions.listing_dq_rules as L_DQRules
import shared_functions.prepareForHearing as PFH
import shared_functions.prepareforhearing_dq_rules as PFH_DQRules
import shared_functions.decision as D
import shared_functions.decision_dq_rules as D_DQRules
import shared_functions.decided_a as DA
import shared_functions.decided_a_dq_rules as DA_DQRules
import shared_functions.ftpa_submitted_a as FSA
import shared_functions.ftpa_submitted_a_dq_rules as FSA_DQRules
import shared_functions.ended as E
import shared_functions.ended_dq_rules as E_DQRules
from shared_functions.DQRules import base_DQRules, build_rule_expression

In [0]:
import json
from datetime import datetime
from pyspark.sql.functions import *
from pyspark.sql import functions as F
import os

In [0]:
config = spark.read.option("multiline", "true").json("dbfs:/configs/config.json")
env_name = config.first()["env"].strip().lower()
lz_key = config.first()["lz_key"].strip().lower()

print(f"env_code: {lz_key}")  # This won't be redacted
print(f"env_name: {env_name}")  # This won't be redacted

KeyVault_name = f"ingest{lz_key}-meta002-{env_name}"
print(f"KeyVault_name: {KeyVault_name}") 

In [0]:
# Service principal credentials
client_id = dbutils.secrets.get(KeyVault_name, "SERVICE-PRINCIPLE-CLIENT-ID")
client_secret = dbutils.secrets.get(KeyVault_name, "SERVICE-PRINCIPLE-CLIENT-SECRET")
tenant_id = dbutils.secrets.get(KeyVault_name, "SERVICE-PRINCIPLE-TENANT-ID")

# Storage account names
curated_storage = f"ingest{lz_key}curated{env_name}"
checkpoint_storage = f"ingest{lz_key}xcutting{env_name}"
raw_storage = f"ingest{lz_key}raw{env_name}"
landing_storage = f"ingest{lz_key}landing{env_name}"
external_storage = f"ingest{lz_key}external{env_name}"


# Spark config for curated storage (Delta table)
spark.conf.set(f"fs.azure.account.auth.type.{curated_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{curated_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{curated_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{curated_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{curated_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

# Spark config for checkpoint storage
spark.conf.set(f"fs.azure.account.auth.type.{checkpoint_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{checkpoint_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{checkpoint_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{checkpoint_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{checkpoint_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

# Spark config for checkpoint storage
spark.conf.set(f"fs.azure.account.auth.type.{raw_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{raw_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{raw_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{raw_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{raw_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

# Spark config for checkpoint storage
spark.conf.set(f"fs.azure.account.auth.type.{landing_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{landing_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{landing_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{landing_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{landing_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")


# Spark config for checkpoint storage
spark.conf.set(f"fs.azure.account.auth.type.{external_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{external_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{external_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{external_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{external_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

In [0]:
AppealState = "ended"
output_name = "ended"

# Setting variables for use in subsequent cells
bronze_path = f"abfss://bronze@ingest{lz_key}curated{env_name}.dfs.core.windows.net/ARIADM/ACTIVE/CCD/APPEALS/"
silver_path = f"abfss://silver@ingest{lz_key}curated{env_name}.dfs.core.windows.net/ARIADM/ACTIVE/CCD/APPEALS/"
audit_path = f"abfss://silver@ingest{lz_key}curated{env_name}.dfs.core.windows.net/ARIADM/ACTIVE/CCD/APPEALS/AUDIT/{AppealState}"
gold_outputs = f"ARIADM/ACTIVE/CCD/APPEALS/{AppealState}"

# Print all variables
variables = {
    # "read_hive": read_hive,
    
    "bronze_path": bronze_path,
    "silver_path": silver_path,
    "audit_path": audit_path,
    "gold_outputs": gold_outputs,
    "key_vault": KeyVault_name,
    "AppealState": AppealState
}

display(variables)

In [0]:
from pyspark.sql.functions import col, lit

silver_m1 = spark.table("hive_metastore.ariadm_active_appeals.silver_appealcase_detail").filter(col("dv_targetState") == lit(AppealState)).distinct()
silver_m2 = spark.table("hive_metastore.ariadm_active_appeals.silver_caseapplicant_detail") 
silver_m3 = spark.table("hive_metastore.ariadm_active_appeals.silver_status_detail").filter(col("dv_targetState") == lit(AppealState))
silver_m4 = spark.table("hive_metastore.ariadm_active_appeals.silver_transaction_detail").filter(col("dv_targetState") == lit(AppealState))
silver_m5 = spark.table("hive_metastore.ariadm_active_appeals.silver_link_detail").filter(col("dv_targetState") == lit(AppealState))
silver_m6 = spark.table("hive_metastore.ariadm_active_appeals.silver_adjudicator_detail").filter(col("dv_targetState") == lit(AppealState))
silver_c = spark.table("hive_metastore.ariadm_active_appeals.silver_appealcategory_detail")
silver_h = spark.table("hive_metastore.ariadm_active_appeals.silver_history_detail").filter(col("dv_targetState") == lit(AppealState))

bronze_remissions_lookup_df = spark.table("hive_metastore.ariadm_active_appeals.bronze_remissions").distinct()
bronze_countryFromAddress = spark.table("hive_metastore.ariadm_active_appeals.bronze_countries_countryFromAddress")
bronze_HORef_cleansing = spark.table("hive_metastore.ariadm_active_appeals.bronze_HORef_cleansing")
bronze_hearing_centres = spark.table("hive_metastore.ariadm_active_appeals.bronze_hearing_centres")
bronze_derive_hearing_centres = spark.table("hive_metastore.ariadm_active_appeals.bronze_derive_hearing_centres")
bronze_interpreter_languages = spark.table("hive_metastore.ariadm_active_appeals.bronze_interpreter_languages")
bronze_listing_location = spark.table("hive_metastore.ariadm_active_appeals.bronze_listing_location")
bronze_ended_states = spark.table("hive_metastore.ariadm_active_appeals.bronze_ended_states")

In [0]:
df, df_audit = PP.appealType(silver_m1)
# display(df)

In [0]:
df, df_audit = PP.caseData(silver_m1, silver_m2, silver_m3, silver_h, bronze_hearing_centres, bronze_derive_hearing_centres)
# display(df)

In [0]:
df, df_audit = PP.flagsLabels(silver_m1, silver_m2, silver_c)
# display(df)

In [0]:
df, df_audit = PP.legalRepDetails(silver_m1,bronze_countryFromAddress)
# display(df)

In [0]:
df, df_audit = AERa.appellantDetails(silver_m1, silver_m2, silver_c, bronze_countryFromAddress,bronze_HORef_cleansing)
# display(df)

In [0]:
df, df_audit = PP.homeOfficeDetails(silver_m1, silver_m2, silver_c, bronze_HORef_cleansing)
# display(df)

In [0]:
df, df_audit = APS.paymentType(silver_m1, silver_m4)
# display(df)

In [0]:
df, df_audit = PP.partyID(silver_m1, silver_m3, silver_c)
# display(df)

In [0]:
df, df_audit = APS.remissionTypes(silver_m1, bronze_remissions_lookup_df, silver_m4)
# display(df)

In [0]:
df, df_audit = PP.sponsorDetails(silver_m1, silver_c)
# display(df)

In [0]:
df, df_audit = E.hearingRequirements(silver_m1, silver_m3, silver_c, bronze_interpreter_languages)
# display(df)

In [0]:
df, df_audit = E.general(silver_m1, silver_m2, silver_m3, silver_h, bronze_hearing_centres, bronze_derive_hearing_centres)
# display(df)

In [0]:
df = E.generalDefault(silver_m1,silver_m3)
# display(df)

In [0]:
import importlib
importlib.reload(E)

In [0]:
df, df_audit = E.documents(silver_m1,silver_m3)
display(df_audit)

In [0]:
df,df_audit = E.ftpa(silver_m3, silver_c)
# display(df)

In [0]:
df, df_audit = E.hearingActuals(silver_m3)
# display(df)

In [0]:
df, df_audit = E.substantiveDecision(silver_m1,silver_m3)
# display(df)

In [0]:
df, df_audit = E.hearingResponse(silver_m1, silver_m3, silver_m6)
# display(df)

In [0]:
df, df_audit = E.hearingDetails(silver_m1,silver_m3,bronze_listing_location)
# display(df)

In [0]:
df, df_audit = E.ended(silver_m3,bronze_ended_states)
# df.display()

In [0]:
df, df_audit = PP.caseState(silver_m1, "ended")
# display(df)

In [0]:
import importlib
importlib.reload(E)

In [0]:
from pyspark.sql.types import StringType, StructType, ArrayType, MapType
from pyspark.sql.functions import col, lit, to_json, struct, concat, regexp_replace
from datetime import datetime

def mainEnded(silver_segmentation, silver_m1, silver_m2, silver_m3, silver_m6, silver_c, silver_h, bronze_remissions, bronze_countryFromAddress, bronze_HORef_cleansing, bronze_hearing_centres, bronze_derive_hearing_centres, bronze_interpreter_languages, bronze_listing_location,bronze_ended_states):
    AppealState = "ended"
    
    # Aggregate details
    appealType_df, appealType_df_audit = PP.appealType(silver_m1)
    caseData_df, caseData_df_audit = PP.caseData(silver_m1, silver_m2, silver_m3, silver_h, bronze_hearing_centres, bronze_derive_hearing_centres)
    flagsLabels_df, flagsLabels_df_audit = PP.flagsLabels(silver_m1, silver_m2, silver_c)
    appellantDetails_df, appellantDetails_df_audit = AERa.appellantDetails(silver_m1, silver_m2, silver_c, bronze_countryFromAddress,bronze_HORef_cleansing)
    legalRepDetails_df, legalRepDetails_df_audit = PP.legalRepDetails(silver_m1,bronze_countryFromAddress)
    partyID_df, partyID_df_audit = PP.partyID(silver_m1, silver_m3, silver_c)
    payment_df, payment_df_audit = APS.paymentType(silver_m1, silver_m4)
    homeOfficeDetails_df, homeOfficeDetails_df_audit = PP.homeOfficeDetails(silver_m1, silver_m2, silver_c, bronze_HORef_cleansing)
    remissionTypes_df, remissionTypes_df_audit = APS.remissionTypes(silver_m1, bronze_remissions, silver_m4)
    sponsorDetails_df, sponsorDetails_df_audit = PP.sponsorDetails(silver_m1, silver_c)
    general_df, general_df_audit = FSA.general(silver_m1, silver_m2, silver_m3, silver_h, bronze_hearing_centres, bronze_derive_hearing_centres)
    generalDefault_df = FSA.generalDefault(silver_m1)
    documents_df, documents_df_audit = FSA.documents(silver_m1,silver_m3)
    hearingResponse_df, hearingResponse_df_audit = PFH.hearingResponse(silver_m1, silver_m3, silver_m6)
    hearingDetails_df, hearingDetails_df_audit = D.hearingDetails(silver_m1,silver_m3,bronze_listing_location)
    caseState_df, caseState_df_audit = PP.caseState(silver_m1, AppealState)
    hearingRequirements_df, hearingRequirements_df_audit = L.hearingRequirements(silver_m1, silver_m3, silver_c, bronze_interpreter_languages)
    substantiveDecision_df, substantiveDecision_df_audit = DA.substantiveDecision(silver_m1,silver_m3)
    hearingActuals_df, hearingActuals_df_audit = DA.hearingActuals(silver_m3)
    ftpa_df, ftpa_df_audit = FSA.ftpa(silver_m3,silver_c)
    ended_df, ended_df_audit = E.ended(silver_m3,bronze_ended_states)

    silver_segmentation_df = silver_segmentation

    # Join all aggregated data with Appeal Case Details
    df_combined = (
        silver_segmentation_df
            .join(appealType_df, on="CaseNo", how="left")
            .join(caseData_df, on="CaseNo", how="left")
            .join(flagsLabels_df, on="CaseNo", how="left")
            .join(appellantDetails_df, on="CaseNo", how="left")
            .join(legalRepDetails_df, on="CaseNo", how="left")
            .join(sponsorDetails_df, on="CaseNo", how="left")
            .join(partyID_df, on="CaseNo", how="left")
            .join(payment_df, on="CaseNo", how="left")
            .join(remissionTypes_df, on="CaseNo", how="left")
            .join(homeOfficeDetails_df, on="CaseNo", how="left")
            .join(caseState_df, on="CaseNo", how="left")
            .join(hearingRequirements_df, on="CaseNo", how="left")
            .join(general_df, on="CaseNo", how="left")
            .join(generalDefault_df, on="CaseNo", how="left")
            .join(documents_df, on="CaseNo", how="left")
            .join(hearingResponse_df, on="CaseNo", how="left")
            .join(hearingDetails_df, on="CaseNo", how="left")
            .join(substantiveDecision_df, on="CaseNo", how="left")
            .join(hearingActuals_df, on="CaseNo", how="left")
            .join(ftpa_df, on="CaseNo", how="left")
            .join(ended_df, on="CaseNo", how="left")
    )

    # Join all aggregated data with Appeal Case Details
    df_combined_audit = (
        silver_segmentation_df.join(appealType_df_audit, on="CaseNo", how="left")
            .join(caseData_df_audit, on="CaseNo", how="left")
            .join(flagsLabels_df_audit, on="CaseNo", how="left")
            .join(appellantDetails_df_audit, on="CaseNo", how="left")
            .join(legalRepDetails_df_audit, on="CaseNo", how="left")
            .join(sponsorDetails_df_audit, on="CaseNo", how="left")
            .join(partyID_df_audit, on="CaseNo", how="left")
            .join(payment_df_audit, on="CaseNo", how="left")
            .join(remissionTypes_df_audit, on="CaseNo", how="left")
            .join(homeOfficeDetails_df_audit, on="CaseNo", how="left")
            .join(caseState_df_audit, on="CaseNo", how="left")
            .join(hearingRequirements_df_audit, on="CaseNo", how="left")
            .join(general_df_audit, on="CaseNo", how="left")
            .join(documents_df_audit, on="CaseNo", how="left")
            .join(hearingResponse_df_audit, on="CaseNo", how="left")
            .join(hearingDetails_df_audit, on="CaseNo", how="left")
            .join(substantiveDecision_df_audit, on="CaseNo", how="left")
            .join(hearingActuals_df_audit, on="CaseNo", how="left")
            .join(ftpa_df_audit, on="CaseNo", how="left")
            .join(ended_df_audit, on="CaseNo", how="left")
    )

    Datetime_name = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")

    # Create JSON and filename and omit columns that are with null values
    df_final = df_combined.withColumn(
        "JSON_Content", to_json(struct(*df_combined.drop(col("CaseNo")).columns))
    ).withColumn(
        "JSON_File_name", concat(lit(f"{gold_outputs}/{Datetime_name}/JSON/APPEALS_"), regexp_replace(col("CaseNo"), "/", "_"), lit(".json"))
    )
    
    return df_final, df_combined_audit

########### Test ##########

# silver_m1 = spark.table("ariadm_active_appeals.silver_appealcase_detail").filter(col("dv_targetState") == lit(AppealState)).distinct()
# silver_m2 =  spark.table("ariadm_active_appeals.silver_caseapplicant_detail").filter(col("dv_targetState") == lit(AppealState))
# silver_m3 = spark.table("ariadm_active_appeals.silver_status_detail").filter(col("dv_targetState") == lit(AppealState))
# silver_c = spark.table("ariadm_active_appeals.silver_appealcategory_detail").filter(col("dv_targetState") == lit(AppealState))
# bronze_remissions = spark.table("ariadm_active_appeals.bronze_remissions").distinct()
# silver_segmentation = spark.table("ariadm_active_appeals.stg_segmentation_states").filter(col("TargetState") == lit(AppealState))

# bronze_countryFromAddress = spark.table("ariadm_active_appeals.bronze_countries_countryFromAddress").withColumn("lu_countryGovUkOocAdminJ", col("countryGovUkOocAdminJ"))

# bronze_HORef_cleansing = spark.table("ariadm_active_appeals.bronze_HORef_cleansing")
# bronze_interpreter_languages = spark.table("ariadm_active_appeals.bronze_interpreter_languages")

# df_final, df_audit = mainEnded(silver_segmentation, silver_m1, silver_m2, silver_m3, silver_m6, silver_c, silver_h, bronze_remissions, bronze_countryFromAddress, bronze_HORef_cleansing, bronze_hearing_centres, bronze_derive_hearing_centres, bronze_interpreter_languages,bronze_listing_location,bronze_ended_states)

# display(df_final)

### Function: Upload  and Blob Client Connection Configuration

In [0]:
secret = dbutils.secrets.get(KeyVault_name, f"CURATED-{env_name}-SAS-TOKEN")

In [0]:
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
import os

# Set up the BlobServiceClient with your connection string
connection_string = secret

blob_service_client = BlobServiceClient.from_connection_string(connection_string)

# Specify the container name
container_name = "gold"
container_client = blob_service_client.get_container_client(container_name)

In [0]:
# Upload HTML to Azure Blob Storage
def upload_to_blob(file_name, file_content):
    try:
        # blob_client = container_client.get_blob_client(f"{gold_outputs}/HTML/{file_name}")
        blob_client = container_client.get_blob_client(f"{file_name}")
        blob_client.upload_blob(file_content, overwrite=True)
        return "success"
    except Exception as e:
        return f"error: {str(e)}"

# Register the upload function as a UDF
upload_udf = udf(upload_to_blob)

# df_with_upload_status = df_final.withColumn(
#     "Status", upload_udf(col("JSON_File_name"), col("JSON_Content"))
# )

# display(df_with_upload_status)


## Gold Outputs and Tracking DLT Table Creation

In [0]:
checks = {}
checks = base_DQRules()
checks = L_DQRules.add_checks(checks)
checks = PFH_DQRules.add_checks(checks)
checks = D_DQRules.add_checks(checks)
checks = DA_DQRules.add_checks(checks)
checks = FSA_DQRules.add_checks(checks)
checks = E_DQRules.add_checks(checks)
dq_rules = build_rule_expression(checks)

In [0]:
import dlt
from pyspark.sql.functions import col, lit, expr
from pyspark.sql import Window

@dlt.table(
    name=f"stg_main_{output_name}_validation",
    comment="DLT table running ended to generate a JSON_Content column for CCD validation. Applies DLT expectations on CCD, adding is_valid to flag validation results.",
    path=f"{audit_path}/stg_main_{output_name}_validation"
)
@dlt.expect_all(checks)
def stg_main_ended_validation():
    try:
        silver_m1 = dlt.read("silver_appealcase_detail").filter(col("dv_targetState") == lit(AppealState)).distinct()
        silver_m2 = dlt.read("silver_caseapplicant_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_m3 = dlt.read("silver_status_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_m6 = dlt.read("silver_adjudicator_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_c = dlt.read("silver_appealcategory_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_h = dlt.read("silver_history_detail").filter(col("dv_targetState") == lit(AppealState))
        bronze_countries_postal_lookup_df = dlt.read("bronze_countries_postal").distinct()
        bronze_remissions = dlt.read("bronze_remissions").distinct()
        bronze_countryFromAddress = dlt.read("bronze_countries_countryFromAddress")
        bronze_HORef_cleansing = dlt.read("bronze_HORef_cleansing")
        bronze_hearing_centres = dlt.read("bronze_hearing_centres")
        bronze_derive_hearing_centres = dlt.read("bronze_derive_hearing_centres")
        bronze_interpreter_languages = dlt.read("bronze_interpreter_languages")
        bronze_listing_location = spark.table("bronze_listing_location")
        bronze_ended_states = spark.table("bronze_ended_states")
        silver_segmentation = dlt.read("stg_segmentation_states").filter(col("TargetState") == lit(AppealState))
        
    except:
        silver_m1 = spark.table("hive_metastore.ariadm_active_appeals.silver_appealcase_detail").filter(col("dv_targetState") == lit(AppealState)).distinct()
        silver_m2 = spark.table("hive_metastore.ariadm_active_appeals.silver_caseapplicant_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_m3 = spark.table("hive_metastore.ariadm_active_appeals.silver_status_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_m6 = spark.table("hive_metastore.ariadm_active_appeals.silver_adjudicator_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_c = spark.table("hive_metastore.ariadm_active_appeals.silver_appealcategory_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_h = spark.table("hive_metastore.ariadm_active_appeals.silver_history_detail").filter(col("dv_targetState") == lit(AppealState))
        bronze_countries_postal_lookup_df = spark.table("hive_metastore.ariadm_active_appeals.bronze_countries_postal").distinct() 
        bronze_remissions = spark.table("hive_metastore.ariadm_active_appeals.bronze_remissions").distinct()
        bronze_countryFromAddress = spark.table("hive_metastore.ariadm_active_appeals.bronze_countries_countryFromAddress")
        bronze_HORef_cleansing = spark.table("hive_metastore.ariadm_active_appeals.bronze_HORef_cleansing")
        bronze_hearing_centres = spark.table("hive_metastore.ariadm_active_appeals.bronze_hearing_centres")
        bronze_derive_hearing_centres = spark.table("hive_metastore.ariadm_active_appeals.bronze_derive_hearing_centres")
        bronze_interpreter_languages = spark.table("hive_metastore.ariadm_active_appeals.bronze_interpreter_languages")
        bronze_listing_location = spark.table("hive_metastore.ariadm_active_appeals.bronze_listing_location")
        bronze_ended_states = spark.table("hive_metastore.ariadm_active_appeals.bronze_ended_states")
        silver_segmentation = spark.table("hive_metastore.ariadm_active_appeals.stg_segmentation_states").filter(col("TargetState") == lit(AppealState))

    df_final, df_audit = mainEnded(silver_segmentation, silver_m1, silver_m2, silver_m3, silver_m6, silver_c, silver_h, bronze_remissions, bronze_countryFromAddress, bronze_HORef_cleansing, bronze_hearing_centres, bronze_derive_hearing_centres, bronze_interpreter_languages,bronze_listing_location,bronze_ended_states)

    valid_representation = silver_m1.select(col("CaseNo"), col("dv_representation"),col("dv_CCDAppealType"),col("CaseRep_Address5"), col("CaseRep_Postcode"),col("MainRespondentId"), col("lu_appealType"), col("HORef"),col("Sponsor_Authorisation"),col("Sponsor_Name"),col("RepresentativeId")) 
    valid_appealant_address = silver_m2.select(col("CaseNo"), col("Appellant_Address1"), col("Appellant_Address2"),col("Appellant_Address3"),("Appellant_Address4"), col("Appellant_Address5"), col("Appellant_Postcode"),col("Appellant_Email"),col("Appellant_Telephone"), col("FCONumber"),col("Appellant_Name")).filter(col("Relationship").isNull())
    valid_country_list = bronze_countries_postal_lookup_df.select(col("countryGovUkOocAdminJ").alias("valid_countryGovUkOocAdminJ")).distinct()
    valid_catagoryid_list = silver_c.groupBy("CaseNo").agg(F.collect_list("CategoryId").alias("valid_categoryIdList"))
    valid_HORef_cleansing = bronze_HORef_cleansing.select( col("CaseNo"),coalesce(col("HORef"), col("FCONumber")).alias("lu_HORef"))
    valid_reasonDescription = silver_m1.alias("m1").join(bronze_remissions, on=["PaymentRemissionReason","PaymentRemissionRequested"], how="left").select("CaseNo", "ReasonDescription",col("remissionClaim").alias("lu_remissionClaim"),col("feeRemissionType").alias("lu_feeRemissionType"))
    window_spec = Window.partitionBy("CaseNo").orderBy(desc("StatusId"))
    valid_caseStatus = silver_m3.select(col("CaseNo"),col("CaseStatus"), col("StatusId"), col("Outcome"), col("AdditionalLanguageId"), row_number().over(window_spec).alias("rn")).filter(
        (col("rn") == 1) & (((col("CaseStatus").isin(37, 38)) & (col("Outcome").isin(0, 27, 37, 39, 40, 50))) | ((col("CaseStatus") == 26) & (col("Outcome").isin(40, 52))))
    ).drop("rn")
    valid_statusId = valid_caseStatus.select(col("CaseNo"),col("CaseStatus"), col("StatusId"), col("Outcome"))
    valid_hearing_requirements = silver_m1.select(col("CaseNo"), col("Interpreter"), col("CourtPreference"), col("InCamera"))
    valid_languages = (
        silver_m1.alias("m1")
            .join(valid_caseStatus.alias("m3"), on="CaseNo", how="left")
            .join(bronze_interpreter_languages.alias("lu_language"), on=(col("m1.LanguageId") == col("lu_language.LanguageId")), how="left")
            .join(bronze_interpreter_languages.alias("lu_additional_language"), on=(col("m3.AdditionalLanguageId") == col("lu_additional_language.LanguageId")), how="left")
            .select(
                col("CaseNo"),
                col("m1.LanguageId").alias("LanguageId"),
                col("m3.AdditionalLanguageId").alias("AdditionalLanguageId"),
                col("lu_language.appellantInterpreterLanguageCategory").alias("valid_languageCategory"),
                col("lu_additional_language.appellantInterpreterLanguageCategory").alias("valid_additionalLanguageCategory"),
                col("lu_language.languageCode").alias("valid_languageCode"),
                col("lu_additional_language.languageCode").alias("valid_additionalLanguageCode"),
                col("lu_language.languageLabel").alias("valid_languageLabel"),
                col("lu_additional_language.languageLabel").alias("valid_additionalLanguageLabel"),
                col("lu_language.manualEntry").alias("valid_manualEntry"),
                col("lu_additional_language.manualEntry").alias("valid_additionalManualEntry"),
                col("lu_language.manualEntryDescription").alias("valid_manualEntryDescription"),
                col("lu_additional_language.manualEntryDescription").alias("valid_additionalManualEntryDescription")
            )
    )
    window_spec = Window.partitionBy("CaseNo").orderBy(col("StatusId").desc())
    silver_m3_filtered_casestatus = silver_m3.filter(col("CaseStatus").isin(37, 38))
    silver_m3_ranked = silver_m3_filtered_casestatus.withColumn("row_number", row_number().over(window_spec))
    silver_m3_filtered_casestatus = silver_m3_ranked.filter(col("row_number") == 1).drop("row_number")

    df_m3_validation = silver_m3_filtered_casestatus.select("CaseNo", "HearingCentre","TimeEstimate","HearingDate","HearingType","CourtName","ListType","ListTypeId","StartTime","Judge1FT_Surname","Judge2FT_Surname","Judge3FT_Surname","Judge1FT_Forenames","Judge2FT_Forenames","Judge3FT_Forenames","Judge1FT_Title","Judge2FT_Title","Judge3FT_Title","CourtClerk_Surname","CourtClerk_Forenames","CourtClerk_Title")
    df_m6_validation =silver_m6.drop("dv_targetState")
    df_m1_validation = silver_m1.select("CaseNo","VisitVisaType")
    df_ll_validation = bronze_listing_location.select(col("ListedCentre"),col("locationCode"),col("locationLabel"),col("listCaseHearingCentre").alias("bronz_listCaseHearingCentre"),col("listCaseHearingCentreAddress").alias("bronz_listCaseHearingCentreAddress"))
    valid_preparforhearing = df_m1_validation.join(df_m3_validation, on="CaseNo", how="left").join(df_ll_validation, on=col("HearingCentre") == col("ListedCentre"), how="left").join(df_m6_validation, on="CaseNo", how="left")

    valid_preparforhearing = valid_preparforhearing.drop("HearingCentre")
    # silver_c_filtered_CategoryId = silver_c.filter(col("CategoryId").isin(37, 38)).select(col("CaseNo"),col("CategoryId")).distinct()
    silver_m3_filtered_outcome = silver_m3.filter(col("CaseStatus").isin(37, 38,26) & col("Outcome").isin(1,2))
    silver_m3_outcome_ranked = silver_m3_filtered_outcome.withColumn("row_number", row_number().over(window_spec))
    silver_m3_outcome_ranked = silver_m3_outcome_ranked.filter(col("row_number") == 1).select(col("CaseNo"), col("Outcome").alias("Outcome_SD")
                                ,col("HearingDuration"),col("Adj_Determination_Title"),col("Adj_Determination_Forenames")
                                ,col("Adj_Determination_Surname"),col("DecisionDate"))
    
    silver_m3_filtered_fpta = silver_m3.filter(col("CaseStatus").isin(39))
    silver_m3_ftpa_ranked = silver_m3_filtered_fpta.withColumn("row_number", row_number().over(window_spec))
    silver_m3_ftpa_ranked = silver_m3_ftpa_ranked.filter(col("row_number") == 1).select(col("CaseNo"),col("Party"),col("OutOfTime"),col("DateReceived"),
                                                                                        col("Adj_Title"),col("Adj_Forenames"),col("Adj_Surname")).distinct()
    ######################Ended State Columns#######################################

    df_ended_dq = (silver_m3.withColumn("CaseStatus", F.col("CaseStatus").cast("int")).withColumn("Outcome", F.col("Outcome").cast("int"))
        .withColumn("StatusId", F.col("StatusId").cast("long")))

    # 2) Build "exists in the same case" flag for sa.CaseStatus IN (10,51,52)
    has_10_51_52 = (df_ended_dq.groupBy("CaseNo").agg(F.max(F.when(F.col("CaseStatus").isin(10, 51, 52), F.lit(1)).otherwise(F.lit(0))).alias("has_10_51_52_in_case")))
    df_with_flag = df_ended_dq.join(has_10_51_52, on="CaseNo", how="left")

    # 3) Apply the full filter equivalent to your SQL WHERE
    cond = (((F.col("CaseStatus") == 10) & F.col("Outcome").isin(80, 122, 25, 120, 2, 105, 13)) |
        ((F.col("CaseStatus") == 46) & (F.col("Outcome") == 31) & (F.col("has_10_51_52_in_case") == 1)) |
        ( (F.col("CaseStatus") == 26) & F.col("Outcome").isin(80, 13, 25)) |
        (F.col("CaseStatus").isin(37, 38) & F.col("Outcome").isin(80, 13, 25, 72, 125)) |
        ((F.col("CaseStatus") == 39) & (F.col("Outcome") == 25)) |
        ((F.col("CaseStatus") == 51) & F.col("Outcome").isin(0, 94, 93)) |
        ((F.col("CaseStatus") == 52) & F.col("Outcome").isin(91, 95)) |
        ((F.col("CaseStatus") == 36) & F.col("Outcome").isin(1, 2, 25))
    )

    filtered = df_with_flag.filter(cond)

    # 4) For each CaseNo, take the row with the MAX(StatusId) among the filtered rows
    w = Window.partitionBy("CaseNo").orderBy(F.col("StatusId").desc())
    m3_net_df = (filtered.withColumn("rn", F.row_number().over(w)).filter(F.col("rn") == 1).drop("rn", "has_10_51_52_in_case"))

    # 5) Build decision_ts robustly and format end date
    silver_with_decision_ts = m3_net_df.alias("m3").join(bronze_ended_states.alias("es"), on=["CaseStatus", "Outcome"], how="left")\
            .withColumn("decision_ts",F.date_format(F.coalesce(F.to_timestamp(F.col("DecisionDate")), 
            F.to_timestamp(F.col("DecisionDate"), "yyyy-MM-dd'T'HH:mm:ss.SSSXXX"),
            F.to_timestamp(F.col("DecisionDate"), "yyyy-MM-dd'T'HH:mm:ss.SSSX")), "dd/MM/yyyy")
    ).select(col("CaseNo"),col("decision_ts"),col("m3.CaseStatus").alias("CaseStatus_end"),col("m3.Outcome").alias("Outcome_end"),col("StatusId").alias("StatusId_end"),col("es.endAppealOutcome").alias("endAppealOutcome_end"),col("es.endAppealOutcomeReason").alias("endAppealOutcomeReason_end"),col("es.stateBeforeEndAppeal").alias("stateBeforeEndAppeal_end"),col("Adj_Determination_Title").alias("Adj_Determination_Title_end"),col("Adj_Determination_Forenames").alias("Adj_Determination_Forenames_end"),col("Adj_Determination_Surname").alias("Adj_Determination_Surname_end"))
            
##############################################################################################################

    df_final = df_final.join(valid_representation, on="CaseNo", how="left"
                            ).join(valid_country_list, on=col("CaseRep_Address5") == col("valid_countryGovUkOocAdminJ"), how="left"
                            ).join(valid_catagoryid_list, on="CaseNo", how="left"
                            ).join(valid_appealant_address, on="CaseNo", how="left"
                            ).join(valid_HORef_cleansing, on="CaseNo", how="left"
                            ).join(valid_reasonDescription, on="CaseNo", how="left"
                            ).join(valid_statusId, on="CaseNo", how="left"
                            ).join(valid_hearing_requirements, on="CaseNo", how="left"
                            ).join(valid_languages, on="CaseNo", how="left"
                            ).join(valid_preparforhearing, on="CaseNo", how="left"
                            ).join(silver_m3_outcome_ranked, on="CaseNo", how="left"
                            ).join(silver_m3_ftpa_ranked, on="CaseNo", how="left"
                            ).join(silver_with_decision_ts, on="CaseNo", how="left")
                            
    df_final = df_final.withColumn("is_valid", expr(dq_rules))

    return df_final

In [0]:
@dlt.table(
    name=f"stg_valid_{output_name}_records",
    comment="Delta Live Gold Table with JSON content.",
    path=f"{audit_path}/stg_valid_{output_name}_records"
)
def stg_valid_ended_records():
    """
    Delta Live Table for creating and uploading JSON content for Appeals.
    """
    # Load source data
    df = dlt.read(f"stg_main_{output_name}_validation")

    df_filtered = df.filter(
        (col("is_valid") == True)
    )

    # Repartition to optimize parallelism
    repartitioned_df = df_filtered.repartition(64)

    df_with_upload_status = repartitioned_df.filter(~col("JSON_content").like("Error%")).withColumn(
        "Status", upload_udf(col("JSON_File_Name"), col("JSON_content"))
    )

    # Return the DataFrame for DLT table creation
    return df_with_upload_status.select("CaseNo", "JSON_content",col("JSON_File_Name").alias("File_Name"),"Status")


In [0]:
@dlt.table(
    name=f"stg_invalid_{output_name}_quarantine_records",
    comment="Quarantined records that failed data quality checks or JSON generation.",
    path=f"{audit_path}/stg_invalid_{output_name}_quarantine_records"
)
def stg_invalid_ended_quarantine_records():

    df = dlt.read(f"stg_main_{output_name}_validation")

    df_filtered = df.filter(
        (col("is_valid") != True)
    ).withColumn("JSON_File_Name", regexp_replace(col("JSON_File_Name"), "/JSON/", "/INVALID_JSON/"))

    # Repartition to optimize parallelism
    repartitioned_df = df_filtered.repartition(64)

    df_with_upload_status = repartitioned_df.filter(~col("JSON_content").like("Error%")).withColumn(
        "Status", upload_udf(col("JSON_File_Name"), col("JSON_content"))
    )

    return df_with_upload_status.select("CaseNo", "JSON_content", col("JSON_File_Name").alias("File_Name"), "Status") 


In [0]:
import dlt
from pyspark.sql.functions import col, lit, expr

@dlt.table(
    name=f"apl_active_{output_name}_cr_audit_table",
    comment="DLT table Covers 4.2 Silver layer LLD requirements: Audits CCD attributes, input field values, derived values, and all columns for validation and traceability.",
    path=f"{audit_path}/apl_active_{output_name}_cr_audit_table"
)
def apl_active_ended_cr_audit_table():
    try:
        silver_m1 = dlt.read("silver_appealcase_detail").filter(col("dv_targetState") == lit(AppealState)).distinct()
        silver_m2 = dlt.read("silver_caseapplicant_detail").filter(col("dv_targetState") == lit(AppealState))
        bronze_appealtype_lookup_df = dlt.read("bronze_appealtype").distinct()
        bronze_hearing_centres_lookup_df = dlt.read("bronze_hearing_centres").distinct()
        silver_m3 = dlt.read("silver_status_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_m6 = dlt.read("silver_adjudicator_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_c = dlt.read("ariadm_active_appeals.silver_appealcategory_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_h = dlt.read("silver_history_detail").filter(col("dv_targetState") == lit(AppealState))
        bronze_remission_lookup_df = dlt.read("bronze_remissions").distinct()
        bronze_remissions_lookup_df = dlt.read("bronze_remissions").distinct()
        bronze_countryFromAddress = dlt.read("bronze_countries_countryFromAddress")
        bronze_HORef_cleansing = dlt.read("bronze_HORef_cleansing")
        bronze_hearing_centres = dlt.read("bronze_hearing_centres")
        bronze_derive_hearing_centres = dlt.read("bronze_derive_hearing_centres")
        bronze_interpreter_languages = dlt.read("bronze_interpreter_languages")
        bronze_listing_location = spark.table("bronze_listing_location")
        bronze_ended_states = spark.table("bronze_ended_states")
        silver_segmentation = dlt.read("stg_segmentation_states").filter(col("TargetState") == lit(AppealState))
        
      
    except:
        silver_m1 = spark.table("hive_metastore.ariadm_active_appeals.silver_appealcase_detail").filter(col("dv_targetState") == lit(AppealState)).distinct()
        silver_m2 = spark.table("hive_metastore.ariadm_active_appeals.silver_caseapplicant_detail").filter(col("dv_targetState") == lit(AppealState))
        bronze_appealtype_lookup_df = spark.table("hive_metastore.ariadm_active_appeals.bronze_appealtype").distinct()
        bronze_hearing_centres_lookup_df = spark.table("hive_metastore.ariadm_active_appeals.bronze_hearing_centres").distinct()
        silver_m3 = spark.table("hive_metastore.ariadm_active_appeals.silver_status_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_m6 = spark.table("hive_metastore.ariadm_active_appeals.silver_adjudicator_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_c = spark.table("hive_metastore.ariadm_active_appeals.silver_appealcategory_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_h = spark.table("hive_metastore.ariadm_active_appeals.silver_history_detail").filter(col("dv_targetState") == lit(AppealState))
        bronze_remissions_lookup_df = spark.table("hive_metastore.ariadm_active_appeals.bronze_remissions").distinct()
        bronze_countryFromAddress = spark.table("hive_metastore.ariadm_active_appeals.bronze_countries_countryFromAddress")
        bronze_HORef_cleansing = spark.table("hive_metastore.ariadm_active_appeals.bronze_HORef_cleansing")
        bronze_hearing_centres = spark.table("hive_metastore.ariadm_active_appeals.bronze_hearing_centres")
        bronze_derive_hearing_centres = spark.table("hive_metastore.ariadm_active_appeals.bronze_derive_hearing_centres")
        bronze_interpreter_languages = spark.table("hive_metastore.ariadm_active_appeals.bronze_interpreter_languages")
        bronze_listing_location = spark.table("hive_metastore.ariadm_active_appeals.bronze_listing_location")
        bronze_ended_states = spark.table("hive_metastore.ariadm_active_appeals.bronze_ended_states")
        silver_segmentation = spark.table("hive_metastore.ariadm_active_appeals.stg_segmentation_states").filter(col("TargetState") == lit(AppealState))

 
    df_final, df_audit = mainEnded(silver_segmentation, silver_m1, silver_m2, silver_m3, silver_m6, silver_c,silver_h, bronze_remissions_lookup_df, bronze_countryFromAddress, bronze_HORef_cleansing, bronze_hearing_centres, bronze_derive_hearing_centres, bronze_interpreter_languages,bronze_listing_location,bronze_ended_states)

    return df_audit

In [0]:
dbutils.notebook.exit("Notebook completed successfully")