In [0]:
# %pip install pycountry
# %pip install uk-postcodes-parsing

In [0]:
## BUild the python wheel
!python -m pip install -U build
!python -m build --wheel
## install the python wheel and restart the python kernel
%pip install $(ls -t dist/*.whl | head -1)
dbutils.library.restartPython()

In [0]:
import json
from datetime import datetime
from pyspark.sql.functions import *
import uk_postcodes_parsing
from pyspark.sql import functions as F
import os

In [0]:
config = spark.read.option("multiline", "true").json("dbfs:/configs/config.json")
env_name = config.first()["env"].strip().lower()
lz_key = config.first()["lz_key"].strip().lower()

print(f"env_code: {lz_key}")  # This won't be redacted
print(f"env_name: {env_name}")  # This won't be redacted

KeyVault_name = f"ingest{lz_key}-meta002-{env_name}"
print(f"KeyVault_name: {KeyVault_name}") 

In [0]:
# Service principal credentials
client_id = dbutils.secrets.get(KeyVault_name, "SERVICE-PRINCIPLE-CLIENT-ID")
client_secret = dbutils.secrets.get(KeyVault_name, "SERVICE-PRINCIPLE-CLIENT-SECRET")
tenant_id = dbutils.secrets.get(KeyVault_name, "SERVICE-PRINCIPLE-TENANT-ID")

# Storage account names
curated_storage = f"ingest{lz_key}curated{env_name}"
checkpoint_storage = f"ingest{lz_key}xcutting{env_name}"
raw_storage = f"ingest{lz_key}raw{env_name}"
landing_storage = f"ingest{lz_key}landing{env_name}"
external_storage = f"ingest{lz_key}external{env_name}"


# Spark config for curated storage (Delta table)
spark.conf.set(f"fs.azure.account.auth.type.{curated_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{curated_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{curated_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{curated_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{curated_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

# Spark config for checkpoint storage
spark.conf.set(f"fs.azure.account.auth.type.{checkpoint_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{checkpoint_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{checkpoint_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{checkpoint_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{checkpoint_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

# Spark config for checkpoint storage
spark.conf.set(f"fs.azure.account.auth.type.{raw_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{raw_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{raw_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{raw_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{raw_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

# Spark config for checkpoint storage
spark.conf.set(f"fs.azure.account.auth.type.{landing_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{landing_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{landing_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{landing_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{landing_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")


# Spark config for checkpoint storage
spark.conf.set(f"fs.azure.account.auth.type.{external_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{external_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{external_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{external_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{external_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

In [0]:
AppealState = "caseUnderReview"
output_name = "caseUnderReview"

# AppealState = "paymentPending"

# Setting variables for use in subsequent cells
bronze_path = f"abfss://bronze@ingest{lz_key}curated{env_name}.dfs.core.windows.net/ARIADM/ACTIVE/CCD/APPEALS/"
silver_path = f"abfss://silver@ingest{lz_key}curated{env_name}.dfs.core.windows.net/ARIADM/ACTIVE/CCD/APPEALS/"
audit_path = f"abfss://silver@ingest{lz_key}curated{env_name}.dfs.core.windows.net/ARIADM/ACTIVE/CCD/APPEALS/AUDIT/{AppealState}"
gold_outputs = f"ARIADM/ACTIVE/CCD/APPEALS/{AppealState}"

# Print all variables
variables = {
    # "read_hive": read_hive,
    
    "bronze_path": bronze_path,
    "silver_path": silver_path,
    "audit_path": audit_path,
    "gold_outputs": gold_outputs,
    "key_vault": KeyVault_name,
    "AppealState": AppealState

}

display(variables)

In [0]:
import Active_Functions.paymentPending as PP
import Active_Functions.AwaitingEvidenceRespondant_a as AERa
import Active_Functions.AwaitingEvidenceRespondant_b as AERb
import Active_Functions.appealSubmitted as APS

In [0]:
from pyspark.sql.functions import col, lit

silver_m1 = spark.table("ariadm_active_appeals.silver_appealcase_detail").filter(col("dv_targetState") == lit(AppealState)).distinct()
silver_m2 = spark.table("ariadm_active_appeals.silver_caseapplicant_detail") 
silver_m3 = spark.table("ariadm_active_appeals.silver_status_detail").filter(col("dv_targetState") == lit(AppealState))
silver_m4 = spark.table("ariadm_active_appeals.silver_transaction_detail").filter(col("dv_targetState") == lit(AppealState))
silver_m5 = spark.table("ariadm_active_appeals.silver_link_detail").filter(col("dv_targetState") == lit(AppealState))
silver_m6 = spark.table("ariadm_active_appeals.silver_adjudicator_detail").filter(col("dv_targetState") == lit(AppealState))
silver_c = spark.table("ariadm_active_appeals.silver_appealcategory_detail")
silver_h = spark.table("ariadm_active_appeals.silver_history_detail").filter(col("dv_targetState") == lit(AppealState))
bronze_remissions = spark.table("ariadm_active_appeals.bronze_remissions").distinct()
bronze_countryFromAddress = spark.table("ariadm_active_appeals.bronze_countries_countryFromAddress")
bronze_HORef_cleansing = spark.table("ariadm_active_appeals.bronze_HORef_cleansing")
bronze_hearing_centres = spark.table("ariadm_active_appeals.bronze_hearing_centres")
bronze_derive_hearing_centres = spark.table("ariadm_active_appeals.bronze_derive_hearing_centres")

In [0]:
df, df_audit = PP.appealType(silver_m1)
# display(df)

In [0]:
df,df_audit = PP.caseData(silver_m1, silver_m2, silver_m3, silver_h, bronze_hearing_centres, bronze_derive_hearing_centres)
# display(df)

In [0]:
df, df_audit = PP.flagsLabels(silver_m1, silver_m2, silver_c)
# display(df)

In [0]:
df, df_audit = PP.legalRepDetails(silver_m1)
# display(df)

In [0]:
df, df_audit = AERa.appellantDetails(silver_m1, silver_m2, silver_c, bronze_countryFromAddress,bronze_HORef_cleansing)
# display(df)

In [0]:
df, df_audit = PP.homeOfficeDetails(silver_m1, silver_m2, silver_c, bronze_HORef_cleansing)
#display(df)

In [0]:
df, df_audit = APS.paymentType(silver_m1, silver_m4)
# display(df)

In [0]:
df_final,df_audit = PP.partyID(silver_m1, silver_m3, silver_c)
# display(df_final)

In [0]:
df, df_audit = APS.remissionTypes(silver_m1, bronze_remissions,silver_m4)
# display(df)

In [0]:
df, df_audit = PP.sponsorDetails(silver_m1, silver_c)
# display(df)

In [0]:
df, df_audit = PP.general(silver_m1)
# display(df)

In [0]:
generalDefault_df = AERb.generalDefault(silver_m1)
# display(generalDefault_df)

In [0]:
df, df_audit = AERb.documents(silver_m1)
# display(df)

In [0]:
df, df_audit = PP.caseState(silver_m1,"awaitingRespondentEvidence")
# display(df)

In [0]:
silver_m3.display()
silver_m6.display()

In [0]:
from pyspark.sql.functions import max as spark_max
from pyspark.sql import functions as F 
from pyspark.sql import Window
from pyspark.sql.functions import col, lit, trim, concat_ws, when, desc

def hearingResponse(silver_m1,silver_m3, silver_m6):
    
    window = Window.partitionBy("CaseNo").orderBy(desc("StatusId"))
    df_stg = silver_m3.filter((F.col("CaseStatus").isin([37,38])) | (F.col("CaseStatus") == 26) & (F.col("Outcome") == 0)).withColumn("rn",F.row_number().over(window)).filter(F.col("rn") == 1).drop(F.col("rn"))


    m3_df = df_stg.withColumn("CourtClerkFull", 
                                            when((col("CourtClerk_Surname").isNotNull()) & (col("CourtClerk_Surname") != ""), concat_ws(" ", col("CourtClerk_Surname"), col("CourtClerk_Forenames"),
        when((col("CourtClerk_Title").isNotNull()) & (col("CourtClerk_Title") != ""),
            concat(lit("("), col("CourtClerk_Title"), lit(")"))).otherwise(lit(None))))
            )

    stg_m6 = silver_m6.withColumn("Transformed_Required",when(F.col("Required") == '0', lit('Not Required')).when(F.col("Required") == '1', lit('Required')))


    final_df = m3_df.join(stg_m6, ["CaseNo"], "left").withColumn("CaseNo", trim(col("CaseNo"))
                    ).withColumn("Hearing Centre", 
                                when(col("HearingCentre").isNull(), "N/A").otherwise(col("HearingCentre")) #ListedCentre
                    ).withColumn("Hearing Date",
                                when(col("HearingDate").isNull(), "N/A").otherwise(col("HearingDate")) #KeyDate
                    ).withColumn("Hearing Type",
                                when(col("HearingType").isNull(), "N/A").otherwise(col("HearingType"))
                    ).withColumn("Court", 
                                when(col("CourtName").isNull(), "N/A").otherwise(col("CourtName"))
                    ).withColumn("List Type",
                                when(col("ListType").isNull(), "N/A").otherwise(col("ListType"))
                    ).withColumn("List Start Time",
                                when(col("StartTime").isNull(), "N/A").otherwise(col("StartTime"))
                    ).withColumn("Judge First Tier", 
                                when(coalesce(col("Judge1FT_Surname"), col("Judge2FT_Surname"), col("Judge3FT_Surname")).isNotNull(),
                                trim(concat_ws(" ",
                                when(col("Judge1FT_Surname").isNotNull(),
                                    concat_ws(" ", col("Judge1FT_Surname"), col("Judge1FT_Forenames"),
                                    when(col("Judge1FT_Title").isNotNull() & (col("Judge1FT_Title") != ""),
                                        concat(lit("("), col("Judge1FT_Title"), lit(")"))).otherwise(lit("")))).otherwise(lit("")),

                                when(col("Judge2FT_Surname").isNotNull(),
                                    concat_ws(" ", col("Judge2FT_Surname"), col("Judge2FT_Forenames"),
                                        when(col("Judge2FT_Title").isNotNull() & (col("Judge2FT_Title") != ""),
                                            concat(lit("("), col("Judge2FT_Title"), lit(")"))).otherwise(lit("")))).otherwise(lit("")),
                
                                when(col("Judge3FT_Surname").isNotNull(),
                                    concat_ws(" ", col("Judge3FT_Surname"), col("Judge3FT_Forenames"),
                                        when(col("Judge3FT_Title").isNotNull() & (col("Judge3FT_Title") != ""),
                                            concat(lit("("), col("Judge3FT_Title"), lit(")"))).otherwise(lit("")))
                                    ).otherwise(lit(""))))
                                ).otherwise(lit(None))
                                
                    ).withColumn("Start Time",
                                when(col("StartTime").isNull(), "N/A").otherwise(col("StartTime"))
                    ).withColumn("Estimated Duration",
                                when(col("TimeEstimate").isNull(), "N/A").otherwise(col("TimeEstimate"))
                    ).withColumn("Required/Incompatible Judicial Officers", concat_ws(" ", col("Judge_Surname"), col("Judge_Forenames")
                    , when(col("Judge_Title").isNotNull(),"("),
                    col("Judge_Title"),
                    when(col("Judge_Title").isNotNull(),")"),
                    when(col("Transformed_Required").isNotNull(),":"), col("Transformed_Required"))
                    ).withColumn("Notes",
                                when(col("Notes").isNull(), "N/A").otherwise(col("Notes"))
                    ).withColumn("additionalInstructionsTribunalResponse",
                                concat(
                                lit("Listed details from ARIA: "),
                                lit("\n Hearing Centre: "), coalesce(col("Hearing Centre"), lit("N/A")),
                                lit("\n Hearing Date: "), coalesce(col("Hearing Date"), lit("N/A")),
                                lit("\n Hearing Type: "), coalesce(col("Hearing Type"), lit("N/A")),
                                lit("\n Court: "), coalesce(col("Court"), lit("N/A")),
                                lit("\n List Type: "), coalesce(col("ListType"), lit("N/A")),
                                lit("\n List Start Time: "), coalesce(col("List Start Time"), lit("N/A")),
                                lit("\n Judge First Tier: "), coalesce(col("Judge First Tier"), lit('')),
                                lit("\n Court Clerk / Usher: "), coalesce(nullif(concat_ws(", ", col("CourtClerkFull")), lit("")), lit("N/A")),
                                lit("\n Start Time: "), coalesce(col("Start Time"), lit("N/A")),
                                lit("\n Estimated Duration: "), coalesce(col("Estimated Duration"), lit("N/A")),
                                lit("\n Required/Incompatible Judicial Officers: "), coalesce(col("Required/Incompatible Judicial Officers"), lit("N/A")),
                                lit("\n Notes: "), coalesce(col("Notes"), lit("N/A"))
                            )
                                
                    )
    additionalInstructionsTribunalResponse_schema_dict = {
        "Hearing Centre": ["HearingCentre"],
        "Hearing Date": ["HearingDate"],
        "Hearing Type": ["HearingType"],
        "Court": ["CourtName"],
        "List Type": ["ListType"],
        "List Start Time": ["StartTime"],
        "Judge First Tier": [
            "Judge1FT_Surname", "Judge1FT_Forenames", "Judge1FT_Title",
            "Judge2FT_Surname", "Judge2FT_Forenames", "Judge2FT_Title",
            "Judge3FT_Surname", "Judge3FT_Forenames", "Judge3FT_Title"
        ],
        "Start Time": ["StartTime"],
        "Estimated Duration": ["TimeEstimate"],
        "Required/Incompatible Judicial Officers": [
            "Judge_Surname", "Judge_Forenames", "Judge_Title", "Transformed_Required"
        ],
        "Notes": ["Notes"],
        "Court Clerk / Usher": [
            "CourtClerk_Surname", "CourtClerk_Forenames", "CourtClerk_Title"
        ]
    }

    content_df = final_df.select(
        col("CaseNo"),
        col("additionalInstructionsTribunalResponse"))

    df_audit = final_df.alias("f").join(silver_m1.alias("m1"), col("m1.CaseNo") == col("f.CaseNo"), "left").select(
        col("f.CaseNo"),
        col("f.additionalInstructionsTribunalResponse"),
        array(
            struct(
                lit("Hearing Centre").alias("field"),
                array(lit("HearingCentre")).alias("source_columns")
            ),
            struct(
                lit("Hearing Date").alias("field"),
                array(lit("HearingDate")).alias("source_columns")
            ),
            struct(
                lit("Hearing Type").alias("field"),
                array(lit("HearingType")).alias("source_columns")
            ),
            struct(
                lit("Court").alias("field"),
                array(lit("CourtName")).alias("source_columns")
            ),
            struct(
                lit("List Type").alias("field"),
                array(lit("ListType")).alias("source_columns")
            ),
            struct(
                lit("List Start Time").alias("field"),
                array(lit("StartTime")).alias("source_columns")
            ),
            struct(
                lit("Judge First Tier").alias("field"),
                array(
                    lit("Judge1FT_Surname"), lit("Judge1FT_Forenames"), lit("Judge1FT_Title"),
                    lit("Judge2FT_Surname"), lit("Judge2FT_Forenames"), lit("Judge2FT_Title"),
                    lit("Judge3FT_Surname"), lit("Judge3FT_Forenames"), lit("Judge3FT_Title")
                ).alias("source_columns")
            ),
            struct(
                lit("Start Time").alias("field"),
                array(lit("StartTime")).alias("source_columns")
            ),
            struct(
                lit("Estimated Duration").alias("field"),
                array(lit("TimeEstimate")).alias("source_columns")
            ),
            struct(
                lit("Required/Incompatible Judicial Officers").alias("field"),
                array(lit("Judge_Surname"), lit("Judge_Forenames"), lit("Judge_Title"), lit("Transformed_Required")).alias("source_columns")
            ),
            struct(
                lit("Notes").alias("field"),
                array(lit("Notes")).alias("source_columns")
            ),
            struct(
                lit("Court Clerk / Usher").alias("field"),
                array(lit("CourtClerk_Surname"), lit("CourtClerk_Forenames"), lit("CourtClerk_Title")).alias("source_columns")
            ),
            struct(
                lit("dv_representation").alias("field"),
                array(lit("dv_representation")).alias("source_columns")
            ),
            struct(
                lit("dv_CCDAppealType").alias("field"),
                array(lit("dv_CCDAppealType")).alias("source_columns")
            )
        ).alias("additionalInstructionsTribunalResponse_inputFields"),
        array(
            struct(lit("Hearing Centre").alias("field"), col("f.`Hearing Centre`").cast("string").alias("value")),
            struct(lit("Hearing Date").alias("field"), col("f.`Hearing Date`").cast("string").alias("value")),
            struct(lit("Hearing Type").alias("field"), col("f.`Hearing Type`").cast("string").alias("value")),
            struct(lit("Court").alias("field"), col("f.`Court`").cast("string").alias("value")),
            struct(lit("List Type").alias("field"), col("f.`List Type`").cast("string").alias("value")),
            struct(lit("List Start Time").alias("field"), col("f.`List Start Time`").cast("string").alias("value")),
            struct(lit("Judge First Tier").alias("field"), col("f.`Judge First Tier`").cast("string").alias("value")),
            struct(lit("Start Time").alias("field"), col("f.`Start Time`").cast("string").alias("value")),
            struct(lit("Estimated Duration").alias("field"), col("f.`Estimated Duration`").cast("string").alias("value")),
            struct(lit("Required/Incompatible Judicial Officers").alias("field"), col("f.`Required/Incompatible Judicial Officers`").cast("string").alias("value")),
            struct(lit("Notes").alias("field"), col("f.`Notes`").cast("string").alias("value")),
            struct(lit("Court Clerk / Usher").alias("field"), col("f.CourtClerkFull").cast("string").alias("value")),
            struct(lit("dv_representation").alias("field"), col("m1.dv_representation").cast("string").alias("value")),
            struct(lit("dv_CCDAppealType").alias("field"), col("m1.dv_CCDAppealType").cast("string").alias("value"))
        ).alias("additionalInstructionsTribunalResponse_inputValues")
    )
    return content_df, df_audit

df,a_df = hearingResponse(silver_m1,silver_m3,silver_m6)

In [0]:
df.display()

In [0]:
final_df.display()

In [0]:
from pyspark.sql.functions import array, struct

df_audit = final_df.alias("f").join(silver_m1.alias("m1"), col("m1.CaseNo") == col("f.CaseNo"), "left").select(
    col("f.CaseNo"),
    col("f.additionalInstructionsTribunalResponse"),
    array(
        struct(
            lit("Hearing Centre").alias("field"),
            array(lit("HearingCentre")).alias("source_columns")
        ),
        struct(
            lit("Hearing Date").alias("field"),
            array(lit("HearingDate")).alias("source_columns")
        ),
        struct(
            lit("Hearing Type").alias("field"),
            array(lit("HearingType")).alias("source_columns")
        ),
        struct(
            lit("Court").alias("field"),
            array(lit("CourtName")).alias("source_columns")
        ),
        struct(
            lit("List Type").alias("field"),
            array(lit("ListType")).alias("source_columns")
        ),
        struct(
            lit("List Start Time").alias("field"),
            array(lit("StartTime")).alias("source_columns")
        ),
        struct(
            lit("Judge First Tier").alias("field"),
            array(
                lit("Judge1FT_Surname"), lit("Judge1FT_Forenames"), lit("Judge1FT_Title"),
                lit("Judge2FT_Surname"), lit("Judge2FT_Forenames"), lit("Judge2FT_Title"),
                lit("Judge3FT_Surname"), lit("Judge3FT_Forenames"), lit("Judge3FT_Title")
            ).alias("source_columns")
        ),
        struct(
            lit("Start Time").alias("field"),
            array(lit("StartTime")).alias("source_columns")
        ),
        struct(
            lit("Estimated Duration").alias("field"),
            array(lit("TimeEstimate")).alias("source_columns")
        ),
        struct(
            lit("Required/Incompatible Judicial Officers").alias("field"),
            array(lit("Judge_Surname"), lit("Judge_Forenames"), lit("Judge_Title"), lit("Transformed_Required")).alias("source_columns")
        ),
        struct(
            lit("Notes").alias("field"),
            array(lit("Notes")).alias("source_columns")
        ),
        struct(
            lit("Court Clerk / Usher").alias("field"),
            array(lit("CourtClerk_Surname"), lit("CourtClerk_Forenames"), lit("CourtClerk_Title")).alias("source_columns")
        ),
        struct(
            lit("dv_representation").alias("field"),
            array(lit("dv_representation")).alias("source_columns")
        ),
        struct(
            lit("dv_CCDAppealType").alias("field"),
            array(lit("dv_CCDAppealType")).alias("source_columns")
        )
    ).alias("additionalInstructionsTribunalResponse_inputFields"),
    array(
        struct(lit("Hearing Centre").alias("field"), col("f.`Hearing Centre`").cast("string").alias("value")),
        struct(lit("Hearing Date").alias("field"), col("f.`Hearing Date`").cast("string").alias("value")),
        struct(lit("Hearing Type").alias("field"), col("f.`Hearing Type`").cast("string").alias("value")),
        struct(lit("Court").alias("field"), col("f.`Court`").cast("string").alias("value")),
        struct(lit("List Type").alias("field"), col("f.`List Type`").cast("string").alias("value")),
        struct(lit("List Start Time").alias("field"), col("f.`List Start Time`").cast("string").alias("value")),
        struct(lit("Judge First Tier").alias("field"), col("f.`Judge First Tier`").cast("string").alias("value")),
        struct(lit("Start Time").alias("field"), col("f.`Start Time`").cast("string").alias("value")),
        struct(lit("Estimated Duration").alias("field"), col("f.`Estimated Duration`").cast("string").alias("value")),
        struct(lit("Required/Incompatible Judicial Officers").alias("field"), col("f.`Required/Incompatible Judicial Officers`").cast("string").alias("value")),
        struct(lit("Notes").alias("field"), col("f.`Notes`").cast("string").alias("value")),
        struct(lit("Court Clerk / Usher").alias("field"), col("f.CourtClerkFull").cast("string").alias("value")),
        struct(lit("dv_representation").alias("field"), col("m1.dv_representation").cast("string").alias("value")),
        struct(lit("dv_CCDAppealType").alias("field"), col("m1.dv_CCDAppealType").cast("string").alias("value"))
    ).alias("additionalInstructionsTribunalResponse_inputValues")
)

display(df_audit)

In [0]:
### test

def hearingResponse(silver_m1, silver_m3, silver_m6):

    conditions = (col("dv_representation") == 'AIP') & (col("dv_CCDAppealType").isNotNull())
    row_conditions = silver_m3.groupBy("CaseNo").agg(spark_max("StatusId").alias("max_StatusId"))
    m3_conditions = (col("CaseStatus") == 26) #IF status = 26 = include else OMIT
    m6_conditions = when(col("Required") == '0', lit('Not Required')).when(col("Required") == '1', lit('Required'))

    court_clerk_agg = silver_m3.withColumn("CourtClerkFull", 
                                           when((col("CourtClerk_Surname").isNotNull()) & (col("CourtClerk_Surname") != ""), concat_ws(" ", col("CourtClerk_Surname"), col("CourtClerk_Forenames"),
        when((col("CourtClerk_Title").isNotNull()) & (col("CourtClerk_Title") != ""),
            concat(lit("("), col("CourtClerk_Title"), lit(")"))).otherwise(lit(None))))
            ).groupby("CaseNo"
            ).agg(when(size(collect_list("CourtClerkFull")) > 0, collect_list("CourtClerkFull")).otherwise(lit(None)).alias("Court Clerk / Usher"))
    
    df = silver_m1.join(silver_m3, ["CaseNo"], "left"
                    ).join(silver_m6, ["CaseNo"], "left"
                    ).join(court_clerk_agg, ["CaseNo"], "left"
                    ).join(row_conditions, ["CaseNo"], "left"
                ).withColumn("CaseNo", col("CaseNo")
                ).withColumn("Hearing Centre", 
                            when(col("HearingCentre").isNull(), "N/A").otherwise(col("HearingCentre")) #ListedCentre
                ).withColumn("Hearing Date",
                            when(col("HearingDate").isNull(), "N/A").otherwise(col("HearingDate")) #KeyDate
                ).withColumn("Hearing Type",
                            when(col("HearingType").isNull(), "N/A").otherwise(col("HearingType"))
                ).withColumn("Court", 
                            when(col("CourtName").isNull(), "N/A").otherwise(col("CourtName"))
                ).withColumn("List Type",
                            when(col("ListType").isNull(), "N/A").otherwise(col("ListType"))
                ).withColumn("List Start Time",
                            when(col("StartTime").isNull(), "N/A").otherwise(col("StartTime"))
                ).withColumn("Judge First Tier", 
                            when(coalesce(col("Judge1FT_Surname"), col("Judge2FT_Surname"), col("Judge3FT_Surname")).isNotNull(),
                            trim(concat_ws(" ",
                            when(col("Judge1FT_Surname").isNotNull(),
                                concat_ws(" ", col("Judge1FT_Surname"), col("Judge1FT_Forenames"),
                                when(col("Judge1FT_Title").isNotNull() & (col("Judge1FT_Title") != ""),
                                    concat(lit("("), col("Judge1FT_Title"), lit(")"))).otherwise(lit("")))).otherwise(lit("")),

                            when(col("Judge2FT_Surname").isNotNull(),
                                concat_ws(" ", col("Judge2FT_Surname"), col("Judge2FT_Forenames"),
                                    when(col("Judge2FT_Title").isNotNull() & (col("Judge2FT_Title") != ""),
                                        concat(lit("("), col("Judge2FT_Title"), lit(")"))).otherwise(lit("")))).otherwise(lit("")),
            
                            when(col("Judge3FT_Surname").isNotNull(),
                                 concat_ws(" ", col("Judge3FT_Surname"), col("Judge3FT_Forenames"),
                                    when(col("Judge3FT_Title").isNotNull() & (col("Judge3FT_Title") != ""),
                                        concat(lit("("), col("Judge3FT_Title"), lit(")"))).otherwise(lit("")))
                                 ).otherwise(lit(""))))
                            ).otherwise(lit(None))
                            
                ).withColumn("Start Time",
                            when(col("StartTime").isNull(), "N/A").otherwise(col("StartTime"))
                ).withColumn("Estimated Duration",
                            when(col("TimeEstimate").isNull(), "N/A").otherwise(col("TimeEstimate"))
                ).withColumn("TransformationRequired", m6_conditions
                ).withColumn("Required/Incompatible Judicial Officers", concat_ws(" ", col("Judge_Surname"), col                ("Judge_Forenames"), col("Judge_Title"), "TransformationRequired")
                ).withColumn("Notes",
                            when(col("Notes").isNull(), "N/A").otherwise(col("Notes"))
                ).withColumn("valid_statusId",
                            when(
                                col("CaseStatus").isin("37", "38") | ((col("CaseStatus") == 26) & (col("Outcome") == 0)),
                                col("max_StatusId")
                            ).otherwise(lit(None))
                ).withColumn("additionalInstructionsTribunalResponse",
                            concat(
                            lit("Listed details from ARIA: "),
                            lit("\n Hearing Centre: "), coalesce(col("Hearing Centre"), lit("N/A")),
                            lit("\n Hearing Date: "), coalesce(col("Hearing Date"), lit("N/A")),
                            lit("\n Hearing Type: "), coalesce(col("Hearing Type"), lit("N/A")),
                            lit("\n Court: "), coalesce(col("Court"), lit("N/A")),
                            lit("\n List Type: "), coalesce(col("ListType"), lit("N/A")),
                            lit("\n List Start Time: "), coalesce(col("List Start Time"), lit("N/A")),
                            lit("\n Judge First Tier: "), coalesce(col("Judge First Tier"), lit('')),
                            lit("\n Court Clerk / Usher: "), coalesce(nullif(concat_ws(", ", col("Court Clerk / Usher")), lit("")), lit("N/A")),
                            lit("\n Start Time: "), coalesce(col("Start Time"), lit("N/A")),
                            lit("\n Estimated Duration: "), coalesce(col("Estimated Duration"), lit("N/A")),
                            lit("\n Required/Incompatible Judicial Officers: "), coalesce(col("Required/Incompatible Judicial Officers"), lit("N/A")),
                            lit("\n Notes: "), coalesce(col("Notes"), lit("N/A"))
                        )
                             
                ).select(
                    col("CaseNo"),
                    "additionalInstructionsTribunalResponse"
                ).where(col("valid_statusId").isNotNull() & conditions & m3_conditions).distinct()
            
    common_inputFields = [lit("dv_representation"), lit("dv_CCDAppealType")]
    common_inputValues = [col("m1_audit.dv_representation"), col("m1_audit.dv_CCDAppealType")]

    df_audit = (
        df.alias("content")
        .join(silver_m1.alias("m1_audit"), ["CaseNo"], "left")
        .join(silver_m3.alias("m3_audit"), ["CaseNo"], "left")
        .join(silver_m6.alias("m6_audit"), ["CaseNo"], "left")
        .join(court_clerk_agg, ["CaseNo"], "left")
        .join(row_conditions, ["CaseNo"], "left")
        .select(
            col("CaseNo"),

            array(
                struct(
                    lit("HearingCentre"),
                    lit("HearingDate"),
                    lit("HearingType"),
                    lit("CourtName"),
                    lit("ListType"),
                    lit("StartTime"),
                    lit("Judge1FT_Surname"),
                    lit("Judge2FT_Surname"),
                    lit("Judge3FT_Surname"),
                    lit("Court Clerk / Usher"),
                    lit("TimeEstimate"),
                    lit("Judge_Surname"),
                    lit("Judge_Forenames"),
                    lit("Judge_Title"),
                    lit("Notes")
                )
            ).alias("additionalInstructionsTribunalResponse_inputFields"),

            array(
                struct(
                    col("m3_audit.HearingCentre").cast("string"),
                    col("m3_audit.HearingDate").cast("string"),
                    col("m3_audit.HearingType").cast("string"),
                    col("m3_audit.CourtName").cast("string"),
                    col("m3_audit.ListType").cast("string"),
                    col("m3_audit.StartTime").cast("string"),
                    col("m3_audit.Judge1FT_Surname").cast("string"),
                    col("m3_audit.Judge2FT_Surname").cast("string"),
                    col("m3_audit.Judge3FT_Surname").cast("string"),
                    col("Court Clerk / Usher").cast("string"),
                    col("m3_audit.TimeEstimate").cast("string"),
                    col("m6_audit.Judge_Surname").cast("string"),
                    col("m6_audit.Judge_Forenames").cast("string"),
                    col("m6_audit.Judge_Title").cast("string"),
                    col("m3_audit.Notes").cast("string")
                )
            ).alias("additionalInstructionsTribunalResponse_inputValues"),
            col("content.additionalInstructionsTribunalResponse"),
            lit("yes").alias("additionalInstructionsTribunalResponse_Transformed")
        )
    ).distinct()
    
    return df, df_audit


df, a_df = hearingResponse(silver_m1, silver_m3, silver_m6)

df.display()

In [0]:
m3 = silver_m3.filter(
    (col("CaseStatus").isin(37, 36)) | ((col("CaseStatus") == 26) & (col("Outcome") == 0))
).select(
    "CaseNo",
    "lu_hearingCentre",# "ListedCentre",
    "HearingDate", #"KeyDate",
    "HearingType",
    "CourtName",
    "ListType",
    "StartTime",
    "Judge1FT_Surname",
    "Judge1FT_Forenames",
    "Judge1FT_Title",
    "Judge2FT_Surname",
    "Judge2FT_Forenames",
    "Judge2FT_Title",
    "Judge3FT_Surname",
    "Judge3FT_Forenames",
    "Judge3FT_Title",
    "CourtClerk_Surname",
    "CourtClerk_Forenames",
    "CourtClerk_Title",
    "TimeEstimate",
    "Notes"
)

m6 = silver_m6.select(
    "CaseNo",
    "Judge_Forenames",
    "Judge_Surname",
    "Judge_Title",
    "Required"
)

hearingResponse = m3.join(m6, on=["CaseNo"], how="left").select(
    "CaseNo",
    array(
        col()
    )
    
)
# display(m3)
# display(m6)

In [0]:
from pyspark.sql.types import StringType, StructType, ArrayType, MapType
from pyspark.sql.functions import col, lit, to_json, struct, concat, regexp_replace
from datetime import datetime

def mainPaymentPending(silver_m1, silver_m2, silver_m3, silver_c,silver_h, bronze_remissions, bronze_countryFromAddress, bronze_HORef_cleansing,bronze_hearing_centres,bronze_derive_hearing_centres):
    AppealState = "awaitingRespondentEvidence(b)"

    # Aggregate details
    AppealType_df, AppealType_df_audit = PP.appealType(silver_m1)
    caseData_df, caseData_df_audit = PP.caseData(silver_m1, silver_m2, silver_m3, silver_h, bronze_hearing_centres, bronze_derive_hearing_centres)
    flagsLabels_df, flagsLabels_df_audit = PP.flagsLabels(silver_m1, silver_m2, silver_c)
    appellantDetails_df, appellantDetails_df_audit = AERa.appellantDetails(silver_m1, silver_m2, silver_c, bronze_countryFromAddress,bronze_HORef_cleansing)
    legalRepDetails_df, legalRepDetails_df_audit = PP.legalRepDetails(silver_m1)
    partyID_df, partyID_df_audit = PP.partyID(silver_m1, silver_m3, silver_c)
    payment_df, payment_df_audit = PP.paymentType(silver_m1)
    homeOfficeDetails_df, homeOfficeDetails_df_audit = PP.homeOfficeDetails(silver_m1, silver_m2, silver_c, bronze_HORef_cleansing)
    remissionTypes_df, remissionTypes_df_audit = PP.remissionTypes(silver_m1, bronze_remissions, silver_m4)
    sponsorDetails_df, sponsorDetails_df_audit = PP.sponsorDetails(silver_m1, silver_c)
    general_df, general_df_audit = PP.general(silver_m1)
    generalDefault_df = AERb.generalDefault(silver_m1)
    documents_df, documents_df_audit = PP.documents(silver_m1)
    caseState_df, caseState_df_audit = PP.caseState(silver_m1,"awaitingRespondentEvidence")

    # Join all aggregated data with Appeal Case Details
    df_combined = (
        AppealType_df.join(caseData_df, on="CaseNo", how="left")
        .join(legalRepDetails_df, on="CaseNo", how="left")
        .join(appellantDetails_df, on="CaseNo", how="left")
        .join(flagsLabels_df, on="CaseNo", how="left")
        .join(partyID_df, on="CaseNo", how="left")
        .join(homeOfficeDetails_df, on="CaseNo", how="left")
        .join(remissionTypes_df, on="CaseNo", how="left")
        .join(sponsorDetails_df, on="CaseNo", how="left")
        .join(payment_df, on="CaseNo", how="left")
        .join(general_df, on="CaseNo", how="left")
        .join(generalDefault_df, on="CaseNo", how="left")
        .join(documents_df, on="CaseNo", how="left")
        .join(caseState_df, on="CaseNo", how="left")
    
    )

    # Join all aggregated data with Appeal Case Details
    df_combined_audit = (
        AppealType_df_audit.join(caseData_df_audit, on="CaseNo", how="left")
        .join(legalRepDetails_df_audit, on="CaseNo", how="left")
        .join(appellantDetails_df_audit, on="CaseNo", how="left")
        .join(flagsLabels_df_audit, on="CaseNo", how="left")
        .join(partyID_df_audit, on="CaseNo", how="left")
        .join(homeOfficeDetails_df_audit, on="CaseNo", how="left")
        .join(remissionTypes_df_audit, on="CaseNo", how="left")
        .join(sponsorDetails_df_audit, on="CaseNo", how="left")
        .join(payment_df_audit, on="CaseNo", how="left")
        .join(general_df_audit, on="CaseNo", how="left")
        .join(documents_df_audit, on="CaseNo", how="left")
        .join(caseState_df_audit, on="CaseNo", how="left")
    )

    Datetime_name = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")

    # Create JSON and filename and omit columns that are with null values
    df_final = df_combined.withColumn(
        "JSON_Content", to_json(struct(*df_combined.drop(col("CaseNo")).columns))
    ).withColumn(
        "JSON_File_name", concat(lit(f"{gold_outputs}/{Datetime_name}/JSON/APPEALS_"), regexp_replace(col("CaseNo"), "/", "_"), lit(".json"))
    )
    
    return df_final, df_combined_audit

########### Test ##########

silver_m1 = spark.table("ariadm_active_appeals.silver_appealcase_detail").filter(col("dv_targetState") == lit(AppealState)).distinct()
silver_m2 =  spark.table("ariadm_active_appeals.silver_caseapplicant_detail").filter(col("dv_targetState") == lit(AppealState))
silver_m3 = spark.table("ariadm_active_appeals.silver_status_detail").filter(col("dv_targetState") == lit(AppealState))
silver_c = spark.table("ariadm_active_appeals.silver_appealcategory_detail").filter(col("dv_targetState") == lit(AppealState))
bronze_remissions = spark.table("ariadm_active_appeals.bronze_remissions").distinct()

bronze_countryFromAddress = spark.table("ariadm_active_appeals.bronze_countries_countryFromAddress").withColumn("lu_countryGovUkOocAdminJ",col("countryGovUkOocAdminJ"))

bronze_HORef_cleansing = spark.table("ariadm_active_appeals.bronze_HORef_cleansing")

df_final, df_audit = mainPaymentPending(silver_m1, silver_m2, silver_m3, silver_c,silver_h, bronze_remissions, bronze_countryFromAddress, bronze_HORef_cleansing,bronze_hearing_centres,bronze_derive_hearing_centres)

display(df_final)

### Function: Upload  and Blob Client Connection Configuration

In [0]:
secret = dbutils.secrets.get(KeyVault_name, f"CURATED-{env_name}-SAS-TOKEN")

In [0]:
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
import os

# Set up the BlobServiceClient with your connection string
connection_string = secret

blob_service_client = BlobServiceClient.from_connection_string(connection_string)

# Specify the container name
container_name = "gold"
container_client = blob_service_client.get_container_client(container_name)

In [0]:
# Upload HTML to Azure Blob Storage
def upload_to_blob(file_name, file_content):
    try:
        # blob_client = container_client.get_blob_client(f"{gold_outputs}/HTML/{file_name}")
        blob_client = container_client.get_blob_client(f"{file_name}")
        blob_client.upload_blob(file_content, overwrite=True)
        return "success"
    except Exception as e:
        return f"error: {str(e)}"

# Register the upload function as a UDF
upload_udf = udf(upload_to_blob)

# df_with_upload_status = df_final.withColumn(
#     "Status", upload_udf(col("JSON_File_name"), col("JSON_Content"))
# )

# display(df_with_upload_status)


## Gold Outputs and Tracking DLT Table Creation

In [0]:
# Define a dictionary to hold data quality checks
checks = {}

# ##############################
# # ARIADM-669 (appealType)
# ##############################
checks["valid_appealReferenceNumber_not_null"] = "(appealReferenceNumber IS NOT NULL)"

# ##############################
# # ARIADM-671 (appealType)
# ##############################
checks["valid_appealtype_in_allowed_values"] = (
    "(AppealType IN ('refusalOfHumanRights', 'refusalOfEu', 'deprivation', 'protection', 'revocationOfProtection', 'euSettlementScheme'))"
)
checks["valid_hmctsCaseCategory_not_null"] = "(hmctsCaseCategory IS NOT NULL)"
checks["valid_appealTypeDescription_not_null"] = "(appealTypeDescription IS NOT NULL)"
# Null Values as accepted values as where Representation = AIP
checks["valid_caseManagementCategory_code_in_list_items"] = """
(
  caseManagementCategory.value.code IS NULL OR
  ARRAY_CONTAINS(
    TRANSFORM(caseManagementCategory.list_items, x -> x.code),
    caseManagementCategory.value.code
  )
)
"""
checks["valid_caseManagementCategory_label_in_list_items"] = """
(
  caseManagementCategory.value.label IS NULL OR
  ARRAY_CONTAINS(
    TRANSFORM(caseManagementCategory.list_items, x -> x.label),
    caseManagementCategory.value.label
  )
)
"""

# ##############################
# # ARIADM-673 (caseData)

# \d is a regular expression (regex) metacharacter that matches any single digit from 0 to 9.
# "yyyy-mm-ddTHH:mm:ssZ" r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$'" for ISO 8601 datetime format
# "yyyy-MM-dd" r'^\d{4}-\d{2}-\d{2}$' for ISO 8601 date format
##############################
checks["valid_appealSubmissionDate_format"] = (
    "(appealSubmissionDate RLIKE r'^\\d{4}-\\d{2}-\\d{2}$')"
)
checks["valid_appealSubmissionInternalDate_format"] = (
    "(appealSubmissionInternalDate RLIKE r'^\\d{4}-\\d{2}-\\d{2}$')"
)
checks["valid_tribunalReceivedDate_format"] = (
    "(tribunalReceivedDate RLIKE r'^\\d{4}-\\d{2}-\\d{2}$')"
)

# ##############################
# # ARIADM-675 (caseData)
# ##############################
checks["valid_appellantsRepresentation_yes_no"] = (
    "(appellantsRepresentation IS NOT NULL AND appellantsRepresentation IN ('Yes', 'No'))"
)
checks["valid_submissionOutOfTime_yes_no"] = (
    "(submissionOutOfTime IS NOT NULL AND submissionOutOfTime IN ('Yes', 'No'))"
)
checks["valid_recordedOutOfTimeDecision_yes_no_or_null"] = (
    "(recordedOutOfTimeDecision IS NULL OR recordedOutOfTimeDecision IN ('Yes', 'No'))"
)
checks["valid_applicationOutOfTimeExplanation_yes_no_or_null"] = (
    "(applicationOutOfTimeExplanation IS NULL OR applicationOutOfTimeExplanation IN ('Yes', 'No'))"
)

# ##############################
# # ARIADM-708 (CaseData)
# ##############################
checks["valid_hearingCentre_in_allowed_values"] = """
(
    hearingCentre IN ('taylorHouse', 'newport', 'newcastle', 'manchester', 'hattonCross', 
    'glasgow', 'bradford', 'birmingham', 'arnhemHouse', 'crownHouse', 'harmondsworth', 
    'yarlsWood', 'remoteHearing', 'decisionWithoutHearing')
)
"""
checks["valid_staffLocation_not_null"] = "(staffLocation IS NOT NULL)"
checks["valid_caseManagementLocation_region_and_baseLocation"] = """
(
  caseManagementLocation.region = '1' AND
  caseManagementLocation.baseLocation IN (
    '231596', '698118', '366559', '386417', '512401',
    '227101', '765324', '366796', '324339', '649000',
    '999971', '420587', '28837'
  )
)
"""
checks["valid_hearingCentreDynamicList_code_in_list_items"] = """
(
  hearingCentreDynamicList.value.code IS NOT NULL AND
  ARRAY_CONTAINS(
    TRANSFORM(hearingCentreDynamicList.list_items, x -> x.code),
    hearingCentreDynamicList.value.code
  )
)
"""
checks["valid_hearingCentreDynamicList_label_in_list_items"] = """
(
  hearingCentreDynamicList.value.label IS NOT NULL AND
  ARRAY_CONTAINS(
    TRANSFORM(hearingCentreDynamicList.list_items, x -> x.label),
    hearingCentreDynamicList.value.label
  )
)
"""
checks["valid_caseManagementLocationRefData_code_in_list_items"] = """
(
  caseManagementLocationRefData.baseLocation.value.code IS NOT NULL AND
  ARRAY_CONTAINS(
    TRANSFORM(caseManagementLocationRefData.baseLocation.list_items, x -> x.code),
    caseManagementLocationRefData.baseLocation.value.code
  )
)
"""
checks["valid_caseManagementLocationRefData_label_in_list_items"] = """
(
  caseManagementLocationRefData.baseLocation.value.label IS NOT NULL AND
  ARRAY_CONTAINS(
    TRANSFORM(caseManagementLocationRefData.baseLocation.list_items, x -> x.label),
    caseManagementLocationRefData.baseLocation.value.label
  )
)
"""
checks["valid_selectedHearingCentreRefData_not_null"] = "(selectedHearingCentreRefData IS NOT NULL)"


# ##############################
# # ARIADM-768 (legalRepDetails)
# # Null Values as accepted values as where Representation = AIP
# ##############################

checks["valid_legalRepGivenName_not_null"] = "((dv_representation = 'LR' AND legalRepGivenName IS NOT NULL) OR (dv_representation != 'LR' AND legalRepGivenName IS NULL))"

checks["valid_legalRepFamilyNamePaperJ_not_null"] = "((dv_representation = 'LR' AND legalRepFamilyNamePaperJ IS NOT NULL) OR (dv_representation != 'LR' AND legalRepFamilyNamePaperJ IS NULL))"

checks["valid_legalRepCompanyPaperJ_not_null"] = "((dv_representation = 'LR' AND legalRepCompanyPaperJ IS NOT NULL) OR (dv_representation != 'LR' AND legalRepCompanyPaperJ IS NULL))"


# ##############################
# # ARIADM-756 (appellantDetails)
# ##############################
checks["valid_appellantFamilyName_not_null"] = "(appellantFamilyName IS NOT NULL)"
checks["valid_appellantGivenNames_not_null"] = "(appellantGivenNames IS NOT NULL)"
checks["valid_appellantFullName_not_null"] = "(appellantFullName IS NOT NULL)"
checks["valid_appellantNameForDisplay_not_null"] = "(appellantNameForDisplay IS NOT NULL)"

checks["valid_appellantDateOfBirth_format"] = (
    "(appellantDateOfBirth RLIKE r'^\\d{4}-\\d{2}-\\d{2}$')"
)
checks["valid_caseNameHmctsInternal_not_null"] = "(caseNameHmctsInternal IS NOT NULL)"
checks["valid_hmctsCaseNameInternal_not_null"] = "(hmctsCaseNameInternal IS NOT NULL)"

# ##############################
# # ARIADM-771 (AppealType - legalRepDetails)
# ##############################

checks["valid_legalrepEmail_not_null"] = "((dv_representation = 'LR' AND legalRepEmail RLIKE r'^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$') OR (dv_representation != 'LR' AND legalRepEmail IS NULL))"

# ##############################
# # ARIADM-758 (appellantDetails)
# ##############################

checks["valid_isAppellantMinor_yes_no"] = (
    "(isAppellantMinor IS NOT NULL AND isAppellantMinor IN ('Yes', 'No'))"
)
checks["valid_deportationOrderOptions_yes_no"] = (
    "(deportationOrderOptions IS NULL OR deportationOrderOptions IN ('Yes', 'No'))"
)
checks["valid_appellantInUk_yes_no"] = (
    "(appellantInUk IS  NULL OR appellantInUk IN ('Yes', 'No'))"
)
checks["valid_appealOutOfCountry_yes_no"] = (
    "(appealOutOfCountry IS  NULL OR appealOutOfCountry IN ('Yes', 'No'))"
)

# ##############################
# # ARIADM-769 (legalRepDetails - Address logic)CaseRepAddress5
# ##############################

checks["valid_legalRepHasAddress_yes_no"] = ( #Omit non-LR records. NLE data will fail all expectations (55) as address are non-UK
  "((dv_representation = 'LR' AND legalRepHasAddress IS NOT NULL AND legalRepHasAddress = 'Yes') OR (dv_representation != 'LR' AND legalRepHasAddress IS NULL))"
)
checks["valid_legalRepHasAddressUK"]   = ( #Omit non-LR records. All fields are null, hence all expectations will fail. (55)
  "(((dv_representation = 'LR' AND oocAddressLine1 IS NOT NULL AND LEN(oocAddressLine1) < 151) OR (dv_representation != 'LR' AND oocAddressLine1 IS NULL)"
  "OR ((dv_representation = 'LR' AND LEN(oocAddressLine2) < 51) OR (dv_representation != 'LR' AND oocAddressLine2 IS NULL))" 
  "OR ((dv_representation = 'LR' AND LEN(oocAddressLine3) < 51) OR (dv_representation != 'LR' AND oocAddressLine3 IS NULL))"
  "OR ((dv_representation = 'LR' AND LEN(oocAddressLine4) < 51) OR (dv_representation != 'LR' AND oocAddressLine4 IS NULL))"
  "OR ((dv_representation = 'LR' AND LEN(CaseRep_Address5) < 51) OR (dv_representation != 'LR' AND CaseRep_Address5 IS NULL))"
  "OR ((dv_representation = 'LR' AND LEN(CaseRep_Postcode) < 15) OR (dv_representation != 'LR' AND CaseRep_Postcode IS NULL))))"
)   
checks["valid_oocAddressLine1"] = ( #Omit non-LR records. NLE data will fail all expectations (55) as fields are null
  "((dv_representation = 'LR' AND oocAddressLine1 IS NOT NULL) OR (dv_representation != 'LR' AND oocAddressLine1 IS NULL))"
)
checks["valid_oocAddressLine2"] = ( #Omit non-LR records. NLE data will fail all expectations (55) as fields are null
  "((dv_representation = 'LR' AND oocAddressLine2 IS NOT NULL) OR (dv_representation != 'LR' AND oocAddressLine2 IS NULL))"
)
checks["valid_oocAddressLine3"] = ( #Omit non-LR records. NLE data will fail all expectations (55) as fields are null
  "((dv_representation = 'LR' AND oocAddressLine3 IS NOT NULL) OR (dv_representation != 'LR' AND oocAddressLine3 IS NULL))"
)
checks["valid_oocAddressLine4"] = ( #Omit non-LR records. NLE data will fail all expectations (55) as fields are null
  "((dv_representation = 'LR' AND oocAddressLine4 IS NOT NULL) OR (dv_representation != 'LR' AND oocAddressLine4 IS NULL))"
)
checks["valid_oocrCountryGovUkAdminJ"] = ( #Omit non-LR records. NLE data will fail all expectations (55) as fields are null
  "((dv_representation = 'LR' AND CaseRep_Address5 IS NOT NULL AND valid_countryGovUkOocAdminJ IS NOT NULL ) OR (dv_representation != 'LR' AND CaseRep_Address5 IS NULL))"
  )

# ##############################
# # ARIADM-766 (appellantStateless)
# ##############################
checks["valid_appellantStateless_values"] = ("(appellantStateless IN ('isStateless', 'hasNationality'))")

checks["valid_appellantNationalitiesDescription_not_null"] = "(appellantNationalitiesDescription IS NOT NULL)"

checks["valid_appellantNationalities_not_null"] = ("(appellantNationalities IS NOT NULL)")

##############################
# ARIADM-760 (appellantDetails) - appellantHasFixedAddress and appellantAddress
##############################

# Only include if CategoryIdList contains 37; check for 'Yes' or 'No'
checks["valid_appellantHasFixedAddress_yes_no_if_cat37"] = (
    "( (array_contains(valid_categoryIdList, 37) AND appellantHasFixedAddress IS NOT NULL AND appellantHasFixedAddress IN ('Yes', 'No')) OR (NOT array_contains(valid_categoryIdList, 37)) )"
)

# ARIADM-XXX (appellantAddress expectations)
# Only include if array_contains(valid_categoryIdList, 37)
checks["valid_appellantAddress_AddressLine1_mandatory_and_length"] = (
    "( (array_contains(valid_categoryIdList, 37) AND appellantAddress.AddressLine1 IS NOT NULL AND LENGTH(appellantAddress.AddressLine1) <= 150) OR (NOT array_contains(valid_categoryIdList, 37)) )"
)
checks["valid_appellantAddress_AddressLine2_length"] = (
    "( (array_contains(valid_categoryIdList, 37) AND (appellantAddress.AddressLine2 IS NULL OR LENGTH(appellantAddress.AddressLine2) <= 50)) OR (NOT array_contains(valid_categoryIdList, 37)) )"
)
checks["valid_appellantAddress_AddressLine3_length"] = (
    "( (array_contains(valid_categoryIdList, 37) AND (appellantAddress.AddressLine3 IS NULL OR LENGTH(appellantAddress.AddressLine3) <= 50)) OR (NOT array_contains(valid_categoryIdList, 37)) )"
)
checks["valid_appellantAddress_PostTown_length"] = (
    "( (array_contains(valid_categoryIdList, 37) AND (appellantAddress.PostTown IS NULL OR LENGTH(appellantAddress.PostTown) <= 50)) OR (NOT array_contains(valid_categoryIdList, 37)) )"
)
checks["valid_appellantAddress_County_length"] = (
    "( (array_contains(valid_categoryIdList, 37) AND (appellantAddress.County IS NULL OR LENGTH(appellantAddress.County) <= 50)) OR (NOT array_contains(valid_categoryIdList, 37)) )"
)
checks["valid_appellantAddress_PostCode_length"] = (
    "( (array_contains(valid_categoryIdList, 37) AND (appellantAddress.PostCode IS NULL OR LENGTH(appellantAddress.PostCode) <= 14)) OR (NOT array_contains(valid_categoryIdList, 37)) )"
)
checks["valid_appellantAddress_Country_length"] = (
    "( (array_contains(valid_categoryIdList, 37) AND (appellantAddress.Country IS NULL OR LENGTH(appellantAddress.Country) <= 50)) OR (NOT array_contains(valid_categoryIdList, 37)) )"
)


# #############################
# # ARIADM-709 (flagsLabels)
# #############################

checks["valid_journeyType_aip_orNull"] = "((dv_representation = 'AIP' AND journeyType = 'aip') OR (dv_representation != 'AIP' AND journeyType IS NULL))"

# #############################
# # ARIADM-710 (flagsLabels)
# #############################

checks["valid_isAriaMigratedFeeExemption_yes_no"] = "((dv_CCDAppealType = 'DA' AND isAriaMigratedFeeExemption = 'Yes') OR (dv_CCDAppealType != 'DA' AND isAriaMigratedFeeExemption = 'No'))"

# ##############################
# # ARIADM-712 (flagsLabel)- caseFlags
# ##############################
checks["valid_caseFlags_name_in_list"] = """
(
  caseFlags.details IS NULL OR
  ARRAY_CONTAINS(
    TRANSFORM(caseFlags.details, x -> x.value.name),
    caseFlags.details[0].value.name
  )
)
"""
checks["valid_caseFlags_pathId_in_list"] = """
(
  caseFlags.details IS NULL OR
  ARRAY_CONTAINS(
    TRANSFORM(caseFlags.details, x -> x.value.path[0].id),
    caseFlags.details[0].value.path[0].id
  )
)
"""
checks["valid_caseFlags_flagCode_in_list"] = """
(
  caseFlags.details IS NULL OR
  ARRAY_CONTAINS(
    TRANSFORM(caseFlags.details, x -> x.value.flagCode),
    caseFlags.details[0].value.flagCode
  )
)
"""
checks["valid_caseFlags_flagComment_in_list"] = """
(
  caseFlags.details IS NULL OR
  ARRAY_CONTAINS(
    TRANSFORM(caseFlags.details, x -> x.value.flagComment),
    caseFlags.details[0].value.flagComment
  )
)
"""
checks["valid_caseFlags_hearingRelevant_in_list"] = """
(
  caseFlags.details IS NULL OR
  ARRAY_CONTAINS(
    TRANSFORM(caseFlags.details, x -> x.value.hearingRelevant),
    caseFlags.details[0].value.hearingRelevant
  )
)
"""

# ##############################
# # ARIADM-712 (flagsLabel)- appellantLevelFlags
# ##############################

checks["valid_appellantLevelFlags_name_in_details"] = """
(
  appellantLevelFlags.details[0].value.name IS NULL OR
  ARRAY_CONTAINS(
    TRANSFORM(appellantLevelFlags.details, x -> x.value.name),
    appellantLevelFlags.details[0].value.name
  )
)
"""

checks["valid_appellantLevelFlags_path_id_in_details"] = """
(
  appellantLevelFlags.details[0].value.path[0].id IS NULL OR
  ARRAY_CONTAINS(
    TRANSFORM(appellantLevelFlags.details, x -> x.value.path[0].id),
    appellantLevelFlags.details[0].value.path[0].id
  )
)
"""

checks["valid_appellantLevelFlags_flagCode_in_details"] = """
(
  appellantLevelFlags.details[0].value.flagCode IS NULL OR
  ARRAY_CONTAINS(
    TRANSFORM(appellantLevelFlags.details, x -> x.value.flagCode),
    appellantLevelFlags.details[0].value.flagCode
  )
)
"""

checks["valid_appellantLevelFlags_flagComment_in_details"] = """
(
  appellantLevelFlags.details[0].value.flagComment IS NULL OR
  ARRAY_CONTAINS(
    TRANSFORM(appellantLevelFlags.details, x -> x.value.flagComment),
    appellantLevelFlags.details[0].value.flagComment
  )
)
"""

checks["valid_appellantLevelFlags_hearingRelevant_in_details"] = """
(
  appellantLevelFlags.details[0].value.hearingRelevant IS NULL OR
  ARRAY_CONTAINS(
    TRANSFORM(appellantLevelFlags.details, x -> x.value.hearingRelevant),
    appellantLevelFlags.details[0].value.hearingRelevant
  )
)
"""

# ##############################
# # ARIADM-780 (PartyID)
# ##############################

checks["valid_appellantPartyId_not_null"] = (
  "(appellantPartyId IS NOT NULL)"
  )
checks["valid_legalRepIndividualPartyId_not_null"] = ( #If appellantsRep = no then appellantsRep = LR
  "(legalRepIndividualPartyId IS NOT NULL AND appellantsRepresentation = 'No')"
  )
checks["validlegalRepOrganisationPartyId_not_null"] = ( #If appellantsRep = no then appellantsRep = LR
  "(legalRepOrganisationPartyId IS NOT NULL AND appellantsRepresentation = 'No')"
  )
checks["valid_sponsorPartyId_not_null"] = (
  "(sponsorPartyId IS NOT NULL)"
  )

# ##############################
# # ARIADM-783 (payment)
# ##############################
checks["valid_feeAmountGbp"] = ( # fee amount is not null and is an int
    "(dv_CCDAppealType IN ('EA','EU','HU','PA') AND (feeAmountGbp IS NOT NULL) AND (TRY_CAST(feeAmountGbp AS INT) IS NOT NULL)) OR (dv_CCDAppealType NOT IN ('EA','EU','HU','PA') AND (feeAmountGbp IS NULL))"
    )

# checks["valid_feeAmountGbp"] = ( # fee amount is not null and is an int
#     "(feeAmountGbp IS NOT NULL AND TRY_CAST(feeAmountGbp AS INT) IS NOT NULL)"
# )

checks["valid_feeDescription"] = ( #feeDescription is not null
    "(dv_CCDAppealType IN ('EA','EU','HU','PA') AND (feeDescription IS NOT NULL)) OR (dv_CCDAppealType NOT IN ('EA','EU','HU','PA') AND (feeDescription IS NULL))"
)

# checks["valid_feeDescription"] = ( #feeDescription is not null
#     "(feeDescription IS NOT NULL)"
# )

checks["valid_feeWithHearing"] = ( # feeWithHearing is not null and is an int
    "(dv_CCDAppealType IN ('EA','EU','HU','PA') AND (feeWithHearing IS NOT NULL) AND (TRY_CAST(feeWithHearing AS INT) IS NOT NULL)) OR (dv_CCDAppealType NOT IN ('EA','EU','HU','PA') AND (feeWithHearing IS NULL))"
)

# checks["valid_feeWithHearing"] = ( # feeWithHearing is not null and is an int
#     "(feeWithHearing IS NOT NULL AND TRY_CAST(feeWithHearing AS INT) IS NOT NULL)"
# )

checks["valid_feeWithoutHearing"] = (# feeWithoutHearing is not null and is an int
    "(dv_CCDAppealType IN ('EA','EU','HU','PA') AND (feeWithoutHearing IS NOT NULL) AND (TRY_CAST(feeWithoutHearing AS INT) IS NOT NULL)) OR (dv_CCDAppealType NOT IN ('EA','EU','HU','PA') AND (feeWithoutHearing IS NULL))"
)

# checks["valid_feeWithoutHearing"] = (# feeWithoutHearing is not null and is an int
#     "(feeWithoutHearing IS NOT NULL AND TRY_CAST(feeWithoutHearing AS INT) IS NOT NULL)"
# )

checks["valid_paymentDescription"] = ( # paymentDescription is not null
    "(dv_CCDAppealType IN ('EA','EU','HU','PA') AND (paymentDescription IS NOT NULL)) OR (dv_CCDAppealType NOT IN ('EA','EU','HU','PA') AND (paymentDescription IS NULL))"
)

# checks["valid_paymentDescription"] = ( # paymentDescription is not null
#     "(paymentDescription IS NOT NULL)"
# )

checks["valid_paymentStatus"] = ( # paymentStatus is not null
  "(dv_CCDAppealType IN ('EA','EU','HU','PA') AND (paymentStatus IS NOT NULL)) OR (dv_CCDAppealType NOT IN ('EA','EU','HU','PA') AND (paymentStatus IS NULL))"
)

# checks["valid_paymentStatus"] = ( # paymentStatus is not null
#   "(paymentStatus IS NOT NULL)"
# )

checks["valid_feeVersion"] = ( # feeVersion is not null
  "(dv_CCDAppealType IN ('EA','EU','HU','PA') AND (feeVersion IS NOT NULL)) OR (dv_CCDAppealType NOT IN ('EA','EU','HU','PA') AND (feeVersion IS NULL))"
)

# checks["valid_feeVersion"] = ( # feeVersion is not null
#   "(feeVersion IS NOT NULL)"
# )

checks["valid_feePaymentAppealType"] = ( # feePaymentAppealType is not null
  "(dv_CCDAppealType IN ('EA','EU','HU','PA') AND (feePaymentAppealType IS NOT NULL)) OR (dv_CCDAppealType NOT IN ('EA','EU','HU','PA') AND (feePaymentAppealType IS NULL))"
)

# checks["valid_feePaymentAppealType"] = ( # feePaymentAppealType is not null
#   "(feePaymentAppealType IS NOT NULL)"
# )

# ##############################
# # ARIADM-785 (remissionTypes)
# ############################## 

checks["valid_remissionType_in_list"] = (
    "(remissionType IN ('noRemission', 'hoWaiverRemission', 'helpWithFees', 'exceptionalCircumstancesRemission') AND dv_CCDAppealType IN ('EA', 'EU', 'HU', 'PA')) OR (remissionType IS NULL AND dv_CCDAppealType NOT IN ('EA', 'EU', 'HU', 'PA'))"
)

checks["valid_remissionClaim_in_list"] = (
    "(remissionClaim IN ('asylumSupport', 'legalAid', 'section17', 'section20', 'homeOfficeWaiver') AND dv_CCDAppealType IN ('EA', 'EU', 'HU', 'PA')) OR (remissionClaim IS NULL AND dv_CCDAppealType NOT IN ('EA', 'EU', 'HU', 'PA'))"
)

checks["valid_feeRemissionType_not_null"] = (
    "(feeRemissionType IS NOT NULL AND dv_CCDAppealType IN ('EA', 'EU', 'HU', 'PA')) OR (dv_CCDAppealType NOT IN ('EA', 'EU', 'HU', 'PA') AND feeRemissionType IS NULL)"
)

# ##############################
# # ARIADM-786 (remissionTypes)
# ##############################

checks["valid_exceptionalCircumstances_not_null"] = (
    "(exceptionalCircumstances IS NOT NULL AND dv_CCDAppealType IN ('EA', 'EU', 'HU', 'PA')) OR (dv_CCDAppealType NOT IN ('EA', 'EU', 'HU', 'PA') AND exceptionalCircumstances IS NULL)"
)

checks["valid_helpWithFeesReferenceNumber_not_null"] = (
    "(helpWithFeesReferenceNumber IS NOT NULL AND dv_CCDAppealType IN ('EA', 'EU', 'HU', 'PA')) OR (dv_CCDAppealType NOT IN ('EA', 'EU', 'HU', 'PA') AND helpWithFeesReferenceNumber IS NULL)"
)

checks["valid_legalAidAccountNumber_not_null"] = (
    "(legalAidAccountNumber IS NOT NULL AND dv_CCDAppealType IN ('EA', 'EU', 'HU', 'PA')) OR (dv_CCDAppealType NOT IN ('EA', 'EU', 'HU', 'PA') AND legalAidAccountNumber IS NULL)"
)

checks["valid_asylumSupportReference_not_null"] = (
    "(asylumSupportReference IS NOT NULL AND dv_CCDAppealType IN ('EA', 'EU', 'HU', 'PA')) OR (dv_CCDAppealType NOT IN ('EA', 'EU', 'HU', 'PA') AND asylumSupportReference IS NULL)"
)

##############################
# ARIADM-773 (SponsorDetails)
##############################
checks["valid_hasSponsor_yes_no"] = (
    "(hasSponsor IS NOT NULL AND hasSponsor IN ('Yes', 'No'))"
)

checks["valid_sponsorGivenNames_not_null"] = (
    "(((array_contains(valid_categoryIdList, 38)) AND hasSponsor = 'Yes' AND sponsorGivenNames IS NOT NULL) OR (NOT array_contains(valid_categoryIdList, 38) AND hasSponsor = 'No' AND sponsorGivenNames IS NULL))"
)

checks["valid_sponsorFamilyName_not_null"] = (
    "(((array_contains(valid_categoryIdList, 38) AND hasSponsor = 'Yes' AND sponsorFamilyName IS NOT NULL) OR (NOT array_contains(valid_categoryIdList, 38) AND hasSponsor = 'No' AND sponsorFamilyName IS NULL)))"
)

checks["valid_sponsorAuthorisation_yes_no"] = (
    "((array_contains(valid_categoryIdList, 38) AND hasSponsor = 'Yes' AND sponsorAuthorisation IN ('Yes', 'No')))"
)

##############################
# ARIADM-776 (SponsorDetails)
##############################
checks["valid_sponsorAddress_not_null"] = (
    "(((array_contains(valid_categoryIdList, 38) AND hasSponsor = 'Yes' AND sponsorAddress IS NOT NULL) OR (NOT array_contains(valid_categoryIdList, 38) AND hasSponsor = 'No' AND sponsorAddress IS NULL)))"
)
##############################
# ARIADM-778 (SponsorDetails)
##############################
checks["valid_sponsorEmailAdminJ"] = (
    "(((array_contains(valid_categoryIdList, 38) AND hasSponsor = 'Yes' AND sponsorEmailAdminJ IS NOT NULL) "
    "OR (NOT array_contains(valid_categoryIdList, 38) OR hasSponsor = 'No') AND sponsorEmailAdminJ IS NULL))"
)

checks["valid_sponsorMobileNumberAdminJ"] = (
    "(((array_contains(valid_categoryIdList, 38) AND hasSponsor = 'Yes' AND sponsorMobileNumberAdminJ IS NOT NULL) "
    "OR (NOT array_contains(valid_categoryIdList, 38) OR hasSponsor = 'No') AND sponsorMobileNumberAdminJ IS NULL))"
)
# ##############################
# ARIADM-760 (appellantDetails)
# ARIADM-762 (appellantDetails)
# ##############################
checks["valid_oocAppealAdminJ_values"] = (
    "( ( (array_contains(valid_categoryIdList, 38) OR MainRespondentId = 4) "
    "AND oocAppealAdminJ IN ('entryClearanceDecision', 'leaveUk', 'none') ) "
    "OR (oocAppealAdminJ IS NULL) )"
)

# Only IF CategoryId IN [38] = Include; ELSE null
checks["valid_appellantHasFixedAddressAdminJ"] = (
    "( (array_contains(valid_categoryIdList, 38) AND appellantHasFixedAddressAdminJ IN ('Yes', 'No')) "
    "OR (NOT array_contains(valid_categoryIdList, 38) AND appellantHasFixedAddressAdminJ IS NULL) )"
)

# addressLine1AdminJ: IS NOT NULL when array_contains(valid_categoryIdList, 38) AND at least one of the coalesce fields is not null; ELSE can be NULL
checks["valid_addressLine1AdminJ"] = (
    "( (array_contains(valid_categoryIdList, 38) AND "
    "(Appellant_Address1 IS NOT NULL OR Appellant_Address2 IS NOT NULL OR Appellant_Address3 IS NOT NULL OR Appellant_Address4 IS NOT NULL OR Appellant_Address5 IS NOT NULL OR Appellant_Postcode IS NOT NULL) "
    "AND addressLine1AdminJ IS NOT NULL) "
    "OR (addressLine1AdminJ IS NULL) )"
)

# addressLine2AdminJ: IS NOT NULL when array_contains(valid_categoryIdList, 38) AND dv_representation = 'LR' AND at least one of the coalesce fields is not null; ELSE can be NULL
checks["valid_addressLine2AdminJ"] = (
    "( (array_contains(valid_categoryIdList, 38) AND dv_representation = 'LR' AND "
    "(Appellant_Address2 IS NOT NULL OR Appellant_Address3 IS NOT NULL OR Appellant_Address4 IS NOT NULL OR Appellant_Address5 IS NOT NULL OR Appellant_Postcode IS NOT NULL) "
    "AND addressLine2AdminJ IS NOT NULL) "
    "OR (addressLine2AdminJ IS NULL) )"
)

# addressLine3AdminJ: IS NOT NULL when array_contains(valid_categoryIdList, 38) AND at least one of the coalesce fields is not null; ELSE can be NULL
checks["valid_addressLine3AdminJ"] = (
    "( (array_contains(valid_categoryIdList, 38) AND "
    "(Appellant_Address3 IS NOT NULL OR Appellant_Address4 IS NOT NULL) "
    "AND addressLine3AdminJ IS NOT NULL) "
    "OR ( addressLine3AdminJ IS NULL) )"
)

# addressLine4AdminJ: IS NOT NULL when array_contains(valid_categoryIdList, 38) AND at least one of the coalesce fields is not null; ELSE can be NULL
checks["valid_addressLine4AdminJ"] = (
    "( (array_contains(valid_categoryIdList, 38) AND "
    "(Appellant_Address5 IS NOT NULL OR Appellant_Postcode IS NOT NULL) "
    "AND addressLine4AdminJ IS NOT NULL) "
    "OR ( addressLine4AdminJ IS NULL) )"
)

# countryGovUkOocAdminJ: IS NOT NULL when array_contains(valid_categoryIdList, 38); ELSE can be NULL
checks["valid_countryGovUkOocAdminJ"] = (
    "( (array_contains(valid_categoryIdList, 38) AND countryGovUkOocAdminJ IS NOT NULL) "
    "OR (countryGovUkOocAdminJ IS NULL) )"
)
##############################
# AARIADM-764 (appellantDetails)
##############################
# ^([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})$ 
checks["valid_internalAppellantEmail_format"] = (
    "( internalAppellantEmail RLIKE r'^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$' OR internalAppellantEmail IS NULL)"
)

checks["valid_email_format"] = (
    "(email RLIKE r'^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$' OR email IS NULL)"
)

checks["valid_internalAppellantMobileNumber"] = (
    "(internalAppellantMobileNumber RLIKE r'^(?=(?:\\D*\\d){7,15}\\D*$)\\+?(\\d[\\d-. ]+)?(\\([\\d-. ]+\\))?[\\d-. ]*\\d$' OR internalAppellantMobileNumber IS NULL)"
)

# ^(?=(?:\D*\d){7,15}\D*$)\+?(\d[\d-. ]+)?(\([\d-. ]+\))?[\d-. ]*\d$
checks["valid_mobileNumber"] = (
    "(mobileNumber RLIKE r'^(?=(?:\\D*\\d){7,15}\\D*$)\\+?(\\d[\\d-. ]+)?(\\([\\d-. ]+\\))?[\\d-. ]*\\d$' OR mobileNumber IS NULL)"
)
##############################
# ARIADM-778 (General)
##############################
checks["isServiceRequestTabVisibleConsideringRemissions_yes_no"] = (
    "(isServiceRequestTabVisibleConsideringRemissions IS NOT NULL AND isServiceRequestTabVisibleConsideringRemissions IN ('Yes', 'No'))"
)

checks["lu_applicationChangeDesignatedHearingCentre_fixed_list"] = (
  "(lu_applicationChangeDesignatedHearingCentre IS NOT NULL AND lu_applicationChangeDesignatedHearingCentre IN ('taylorHouse', 'newport', 'newcastle', 'manchester', 'hattonCross' ,'glasgow' ,'bradford' ,'birmingham', 'arnhemHouse', 'crownHouse', 'harmondsworth', 'yarlsWood', 'remoteHearing', 'decisionWithoutHearing'))"
)
#########################################
# ARIADM-788 and ARIADM-792 (homeOffice)
#########################################
checks["valid_homeOfficeDecisionDate_format"] = (
    "(homeOfficeDecisionDate IS NOT NULL AND homeOfficeDecisionDate RLIKE r'^\\d{4}-\\d{2}-\\d{2}$')"
)

checks["valid_decisionLetterReceivedDate_format"] = (
    "(decisionLetterReceivedDate IS NOT NULL AND decisionLetterReceivedDate RLIKE r'^\\d{4}-\\d{2}-\\d{2}$')"
)

checks["valid_dateEntryClearanceDecision_format"] = (
    "(dateEntryClearanceDecision IS NOT NULL AND dateEntryClearanceDecision RLIKE r'^\\d{4}-\\d{2}-\\d{2}$')"
)

checks["valid_homeOfficeReferenceNumber_not_null"] = (
    "(homeOfficeReferenceNumber IS NOT NULL)"
)

checks["valid_gwfReferenceNumber_not_null"] = (
    "(gwfReferenceNumber IS NOT NULL)"
)

#########################################
# ARIADM-799 (Documents)
#########################################

checks["valid_uploadTheAppealFormDocs"] = (
  "(uploadTheAppealFormDocs IS NOT NULL)"
)

checks["valid_caseNotes"] = (
  "(caseNotes IS NOT NULL)"
)

checks["valid_tribunalDocuments"] = (
  "(tribunalDocuments IS NOT NULL)"
)

checks["valid_legalRepresentativeDocuments"] = (
  "(legalRepresentativeDocuments IS NOT NULL)"
)

In [0]:
dq_rules = "({0})".format(" AND ".join(checks.values()))

In [0]:
import dlt
from pyspark.sql.functions import col, lit, expr

@dlt.table(
    name=f"stg_main_{output_name}_validation",
    comment="DLT table running mainPaymentPending to generate a JSON_Content column for CCD validation. Applies DLT expectations on CCD, adding is_valid to flag validation results.",
    path=f"{audit_path}/stg_main_{output_name}_validation"
)
@dlt.expect_all(checks)
def stg_main_payment_pending_validation():
    try:
        silver_m1 = dlt.read("silver_appealcase_detail").filter(col("dv_targetState") == lit(AppealState)).distinct()
        bronze_appealtype_lookup_df = dlt.read("bronze_appealtype").distinct()
        bronze_hearing_centres_lookup_df = dlt.read("bronze_hearing_centres").distinct()
        # stg_representation = dlt.read("stg_representation").select(col("Representation").alias("valid_representation"))
        silver_m2 = dlt.read("silver_caseapplicant_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_m3 = dlt.read("silver_status_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_m4 = dlt.read("silver_transaction_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_m5 = dlt.read("silver_link_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_m6 = dlt.read("silver_adjudicator_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_c = dlt.read("silver_appealcategory_detail").filter(col("dv_targetState") == lit(AppealState))
        # silver_m8 = dlt.read("silver_documentsreceived_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_h = dlt.read("silver_history_detail").filter(col("dv_targetState") == lit(AppealState))
        bronze_countries_postal_lookup_df = dlt.read("bronze_countries_postal").distinct()
        bronze_remissions_lookup_df = dlt.read("bronze_remissions").distinct()
        bronze_countryFromAddress = dlt.read("bronze_countries_countryFromAddress")
        bronze_HORef_cleansing = dlt.read("bronze_HORef_cleansing")
        bronze_hearing_centres = dlt.read("bronze_hearing_centres")
        bronze_derive_hearing_centres = dlt.read("bronze_derive_hearing_centres")
    except:
        silver_m1 = spark.table("ariadm_active_appeals.silver_appealcase_detail").filter(col("dv_targetState") == lit(AppealState)).distinct()
        bronze_appealtype_lookup_df = spark.table("ariadm_active_appeals.bronze_appealtype").distinct()
        bronze_hearing_centres_lookup_df = spark.table("ariadm_active_appeals.bronze_hearing_centres").distinct()
        # stg_representation = spark.table("ariadm_active_appeals.stg_representation").select(col("Representation").alias("valid_representation"))
        silver_m2 = spark.table("ariadm_active_appeals.silver_caseapplicant_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_m3 = spark.table("ariadm_active_appeals.silver_status_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_m4 = spark.table("ariadm_active_appeals.silver_transaction_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_m5 = spark.table("ariadm_active_appeals.silver_link_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_m6 = spark.table("ariadm_active_appeals.silver_adjudicator_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_c = spark.table("ariadm_active_appeals.silver_appealcategory_detail").filter(col("dv_targetState") == lit(AppealState))
        # silver_m8 = spark.table("ariadm_active_appeals.silver_documentsreceived_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_h = spark.table("ariadm_active_appeals.silver_history_detail").filter(col("dv_targetState") == lit(AppealState))
        bronze_countries_postal_lookup_df = spark.table("ariadm_active_appeals.bronze_countries_postal").distinct() 
        bronze_remissions = spark.table("ariadm_active_appeals.bronze_remissions").distinct()
        bronze_countryFromAddress = spark.table("ariadm_active_appeals.bronze_countries_countryFromAddress")
        bronze_HORef_cleansing = spark.table("ariadm_active_appeals.bronze_HORef_cleansing")
        bronze_hearing_centres = spark.table("ariadm_active_appeals.bronze_hearing_centres")
        bronze_derive_hearing_centres = spark.table("ariadm_active_appeals.bronze_derive_hearing_centres")

 
    df_final,df_audit = mainPaymentPending(silver_m1, silver_m2, silver_m3, silver_c,silver_h, bronze_remissions, bronze_countryFromAddress, bronze_HORef_cleansing,bronze_hearing_centres,bronze_derive_hearing_centres)

 
    valid_representation = silver_m1.select(col("CaseNo"), col("dv_representation"),col("dv_CCDAppealType"),col("CaseRep_Address5"), col("CaseRep_Postcode"),col("MainRespondentId"), col("lu_appealType")) 
    valid_appealant_address = silver_m2.select(col("CaseNo"), col("Appellant_Address1"), col("Appellant_Address2"),col("Appellant_Address3"),("Appellant_Address4"), col("Appellant_Address5"), col("Appellant_Postcode"),col("Appellant_Email"),col("Appellant_Telephone"))
    valid_country_list = bronze_countries_postal_lookup_df.select(col("countryGovUkOocAdminJ").alias("valid_countryGovUkOocAdminJ")).distinct()
    valid_catagoryid_list = silver_c.groupBy("CaseNo").agg(F.collect_list("CategoryId").alias("valid_categoryIdList"))
 
    df_final = df_final.join(valid_representation, on="CaseNo", how="left"
                            ).join(valid_country_list, on=col("CaseRep_Address5") == col("valid_countryGovUkOocAdminJ"), how="left"
                            ).join(valid_catagoryid_list, on="CaseNo", how="left"
                            ).join(valid_appealant_address, on="CaseNo", how="left"
                            )

 
    df_final = df_final.withColumn("is_valid", expr(dq_rules))

    # df_final = df_final.drop(col("dv_representation"), col("CaseRepAddress5"), col("CaseRepPostcode"), col("valid_countryGovUkOocAdminJ"))

    # columns_to_drop = ["dv_representation", "CaseRepAddress5", "CaseRepPostcode"]

    # if all(col in df_final.columns for col in columns_to_drop): #If the columns exist - remove 
    #     df_final = df_final.drop(*columns_to_drop)

    return df_final

In [0]:
@dlt.table(
    name=f"stg_valid_{output_name}_records",
    comment="Delta Live Gold Table with JSON content.",
    path=f"{audit_path}/stg_valid_{output_name}_records"
)
def stg_valid_payment_pending_records():
    """
    Delta Live Table for creating and uploading JSON content for Appeals.
    """
    # Load source data
    df = dlt.read(f"stg_main_{output_name}_validation")

    df_filtered = df.filter(
        (col("is_valid") == True)
    )

    # Repartition to optimize parallelism
    repartitioned_df = df_filtered.repartition(64)

    df_with_upload_status = repartitioned_df.filter(~col("JSON_content").like("Error%")).withColumn(
            "Status", upload_udf(col("JSON_File_Name"), col("JSON_content"))
        )

    # Return the DataFrame for DLT table creation
    return df_with_upload_status.select("CaseNo", "JSON_content",col("JSON_File_Name").alias("File_Name"),"Status")


In [0]:
@dlt.table(
    name=f"stg_invalid_{output_name}_quarantine_records",
    comment="Quarantined records that failed data quality checks or JSON generation.",
    path=f"{audit_path}/stg_invalid_{output_name}_quarantine_records"
)
def stg_invalid_payment_pending_quarantine_records():

    df = dlt.read(f"stg_main_{output_name}_validation")

    df_filtered = df.filter(
        (col("is_valid") != True)
    ).withColumn("JSON_File_Name", regexp_replace(col("JSON_File_Name"), "/JSON/", "/INVALID_JSON/"))

    # Repartition to optimize parallelism
    repartitioned_df = df_filtered.repartition(64)

    df_with_upload_status = repartitioned_df.filter(~col("JSON_content").like("Error%")).withColumn(
            "Status", upload_udf(col("JSON_File_Name"), col("JSON_content"))
        )

    return df_with_upload_status.select("CaseNo", "JSON_content",col("JSON_File_Name").alias("File_Name"),"Status") 


In [0]:
import dlt
from pyspark.sql.functions import col, lit, expr

@dlt.table(
    name=f"apl_active_{output_name}_cr_audit_table",
    comment="DLT table Covers 4.2 Silver layer LLD requirements: Audits CCD attributes, input field values, derived values, and all columns for validation and traceability.",
    path=f"{audit_path}/apl_active_{output_name}_cr_audit_table"
)
def apl_active_payment_pending_cr_audit_table():
    try:
        silver_m1 = dlt.read("silver_appealcase_detail").filter(col("dv_targetState") == lit(AppealState)).distinct()
        silver_m2 = dlt.read("silver_caseapplicant_detail").filter(col("dv_targetState") == lit(AppealState))
        bronze_appealtype_lookup_df = dlt.read("bronze_appealtype").distinct()
        bronze_hearing_centres_lookup_df = dlt.read("bronze_hearing_centres").distinct()
        silver_m3 = dlt.read("silver_status_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_c = dlt.read("ariadm_active_appeals.silver_appealcategory_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_h = dlt.read("silver_history_detail").filter(col("dv_targetState") == lit(AppealState))
        bronze_remission_lookup_df = dlt.read("bronze_remissions").distinct()
        bronze_remissions_lookup_df = dlt.read("bronze_remissions").distinct()
        bronze_countryFromAddress = dlt.read("bronze_countries_countryFromAddress")
        bronze_HORef_cleansing = dlt.read("bronze_HORef_cleansing")
        bronze_hearing_centres = dlt.read("bronze_hearing_centres")
        bronze_derive_hearing_centres = dlt.read("bronze_derive_hearing_centres")
      
    except:
        silver_m1 = spark.table("ariadm_active_appeals.silver_appealcase_detail").filter(col("dv_targetState") == lit(AppealState)).distinct()
        silver_m2 = spark.table("ariadm_active_appeals.silver_caseapplicant_detail").filter(col("dv_targetState") == lit(AppealState))
        bronze_appealtype_lookup_df = spark.table("ariadm_active_appeals.bronze_appealtype").distinct()
        bronze_hearing_centres_lookup_df = spark.table("ariadm_active_appeals.bronze_hearing_centres").distinct()
        silver_m3 = spark.table("ariadm_active_appeals.silver_status_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_c = spark.table("ariadm_active_appeals.silver_appealcategory_detail").filter(col("dv_targetState") == lit(AppealState))
        silver_h = spark.table("ariadm_active_appeals.silver_history_detail").filter(col("dv_targetState") == lit(AppealState))
        bronze_remissions_lookup_df = spark.table("ariadm_active_appeals.bronze_remissions").distinct()
        bronze_countryFromAddress = spark.table("ariadm_active_appeals.bronze_countries_countryFromAddress")
        bronze_HORef_cleansing = spark.table("ariadm_active_appeals.bronze_HORef_cleansing")
        bronze_hearing_centres = spark.table("ariadm_active_appeals.bronze_hearing_centres")
        bronze_derive_hearing_centres = spark.table("ariadm_active_appeals.bronze_derive_hearing_centres")

 
    df_final,df_audit = mainPaymentPending(silver_m1, silver_m2, silver_m3, silver_c,silver_h, bronze_remissions_lookup_df, bronze_countryFromAddress, bronze_HORef_cleansing,bronze_hearing_centres,bronze_derive_hearing_centres)

    return df_audit

In [0]:
dbutils.notebook.exit("Notebook completed successfully")