In [0]:
#Load Config and Setup Enviorment Variables
state_under_test = "paymentPending"
from pyspark.sql import functions as F


config = spark.read.option("multiline", "true").json("dbfs:/configs/config.json")
env_name = config.first()["env"].strip().lower()
lz_key = config.first()["lz_key"].strip().lower()
 
# print(f"env_code: {lz_key}")  # This won't be redacted
# print(f"env_name: {env_name}")  # This won't be redacted
 
KeyVault_name = f"ingest{lz_key}-meta002-{env_name}"
# print(f"KeyVault_name: {KeyVault_name}")
 
# Service principal credentials
client_id = dbutils.secrets.get(KeyVault_name, "SERVICE-PRINCIPLE-CLIENT-ID")
client_secret = dbutils.secrets.get(KeyVault_name, "SERVICE-PRINCIPLE-CLIENT-SECRET")
tenant_id = dbutils.secrets.get(KeyVault_name, "SERVICE-PRINCIPLE-TENANT-ID")
 
# Storage account names
curated_storage = f"ingest{lz_key}curated{env_name}"
checkpoint_storage = f"ingest{lz_key}xcutting{env_name}"
raw_storage = f"ingest{lz_key}raw{env_name}"
landing_storage = f"ingest{lz_key}landing{env_name}"
external_storage = f"ingest{lz_key}external{env_name}"
  
# Spark config for curated storage (Delta table)
spark.conf.set(f"fs.azure.account.auth.type.{curated_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{curated_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{curated_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{curated_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{curated_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")
 
# Spark config for checkpoint storage
spark.conf.set(f"fs.azure.account.auth.type.{checkpoint_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{checkpoint_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{checkpoint_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{checkpoint_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{checkpoint_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")
 
# Spark config for checkpoint storage
spark.conf.set(f"fs.azure.account.auth.type.{raw_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{raw_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{raw_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{raw_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{raw_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")
 
# Spark config for checkpoint storage
spark.conf.set(f"fs.azure.account.auth.type.{landing_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{landing_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{landing_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{landing_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{landing_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")
 
 
# Spark config for checkpoint storage
spark.conf.set(f"fs.azure.account.auth.type.{external_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{external_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{external_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{external_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{external_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")
  
# Setting variables for use in subsequent cells
bronze_path = f"abfss://bronze@ingest{lz_key}curated{env_name}.dfs.core.windows.net/ARIADM/ACTIVE/CCD/APPEALS/"
silver_path = f"abfss://silver@ingest{lz_key}curated{env_name}.dfs.core.windows.net/ARIADM/ACTIVE/CCD/APPEALS/"
audit_path = f"abfss://silver@ingest{lz_key}curated{env_name}.dfs.core.windows.net/ARIADM/ACTIVE/CCD/APPEALS/AUDIT/{state_under_test}"
gold_path = f"abfss://gold@ingest{lz_key}curated{env_name}.dfs.core.windows.net/ARIADM/ACTIVE/CCD/APPEALS/{state_under_test}"
 
 
# Print all variables
# variables = {
#     # "read_hive": read_hive,
    
#     "bronze_path": bronze_path,
#     "silver_path": silver_path,
#     "audit_path": audit_path,
#     "gold_path": gold_path,
#     "key_vault": KeyVault_name,
#     "AppealState": state_under_test
 
# }
 
# display(variables)

import json

#Get Latest Json Folder
json_location = dbutils.fs.ls(f"{gold_path}/")[-1]
latest_json_location = json_location.name
dbutils.fs.ls(f"{gold_path}/{latest_json_location}")

#Set Paths
try: 
    #json_path = f"{gold_path}/{latest_json_location}/JSON/"
    json_path = f"{gold_path}/{latest_json_location}/INVALID_JSON/"
    M1_silver = f"{silver_path}/silver_appealcase_detail"
    M1_bronze = f"{bronze_path}/bronze_appealcase_crep_rep_floc_cspon_cfs"
    M2_silver = f"{silver_path}/silver_caseapplicant_detail"
    M3_silver = f"{silver_path}/silver_status_detail"
    C = f"{silver_path}/silver_appealcategory_detail"
    bhc = f"{bronze_path}/bronze_hearing_centres"
    bat = f"{bronze_path}/bronze_appealtype" 
    docsr = f"{bronze_path}/bronze_documentsreceived"   
    apl_audit = f"{audit_path}/apl_active_payment_pending_cr_audit_table/"
    sh =  f"{silver_path}/silver_history_detail"
except:
    print(f"Error during fetch: {str(e)}")

#Create and Load Dataframes
json_data = spark.read.format("json").load(json_path)
M1_silver = spark.read.format("delta").load(M1_silver)
M1_bronze = spark.read.format("delta").load(M1_bronze)
M2_silver = spark.read.format("delta").load(M2_silver)
M3_silver = spark.read.format("delta").load(M3_silver)
C = spark.read.format("delta").load(C)
bhc = spark.read.format("delta").load(bhc)
bat = spark.read.format("delta").load(bat)
docsr = spark.read.format("delta").load(docsr)
apl_audit = spark.read.format("delta").load(apl_audit)
sh_audit = spark.read.format("delta").load(sh)

#Can be removed later, added to allow developing of code in this notebook to begin with before moving to func files
from pyspark.sql.functions import (
    col, when, lit, array, struct, collect_list, 
    max as spark_max, date_format, row_number, expr, 
    size, udf, coalesce, concat_ws, concat, trim, year, split, datediff,
    collect_set, current_timestamp,transform, first, array_contains
)

In [0]:
###############################
#UPDATE BRONZE DATA SCRIPT FOR PAYMENT PENDING.
#
#NOTE: The below code will update bronze data that will not pass the DQ expecation checks due to
#issues in the data that will be resolved before live but are needeed to get all the data through the checks
#and sent to CCD in the mean time
###############################
from pyspark.sql.functions import *
from delta.tables import DeltaTable

#################
# BirthDate / appellantDateOfBirth

bronze_table = DeltaTable.forName(spark,"ariadm_active_appeals.bronze_appealcase_caseappellant_appellant")

display(bronze_table.toDF().filter(col("CaseNo").isin("HU/00278/2025", "HU/00455/2025", "HU/00472/2025" )).select("CaseNo", "BirthDate"))

bronze_table.update(
    condition=col("CaseNo").isin("HU/00278/2025", "HU/00455/2025", "HU/00472/2025"),
    set={"BirthDate": lit("2000-02-01T00:00:00Z")}
)

display(bronze_table.toDF().filter(col("CaseNo").isin("HU/00278/2025", "HU/00455/2025", "HU/00472/2025" )).select("CaseNo", "BirthDate"))


#################
#valid_appellantNationalitiesDescription_not_null
#and
#valid_appellantNationalities_not_null
#NationalityId
#Where No mapping required for 201/203

bronze_table = DeltaTable.forName(spark, "ariadm_active_appeals.bronze_appealcase_crep_rep_floc_cspon_cfs")

display(bronze_table.toDF().filter(col("CaseNo").isin("HU/00302/2025", "HU/00569/2025", "HU/00586/2025","HU/00560/2025" )).select("CaseNo", "NationalityId"))

bronze_table.update(
    condition=col("CaseNo").isin("HU/00302/2025", "HU/00569/2025", "HU/00586/2025","HU/00560/2025"),
    set={"NationalityId": lit("41")}
)

display(bronze_table.toDF().filter(col("CaseNo").isin("HU/00302/2025", "HU/00569/2025", "HU/00586/2025","HU/00560/2025" )).select("CaseNo", "NationalityId"))

#################
#valid_oocAddressLine1 valid_oocAddressLine2
#changing null values to actual values

bronze_table = DeltaTable.forName(spark, "ariadm_active_appeals.bronze_appealcase_crep_rep_floc_cspon_cfs")
display(bronze_table.toDF().filter(col("CaseNo").isin("HU/00185/2025", "HU/02151/2024")).select("CaseNo", "CaseRep_Address1", "CaseRep_Address2", "CaseRep_Address3", "CaseRep_Address4" , "CaseRep_Address5", "CaseRep_Postcode"))

bronze_table.update(
    condition=col("CaseNo").isin("HU/00185/2025", "HU/02151/2024"),
    set={"CaseRep_Address1": lit("925 Lisa Plains Apt. 642X"),
         "CaseRep_Address2" : lit("Hill SquareX"),
        "CaseRep_Address3" : lit("LynchhavenX"),
        "CaseRep_Address4" : lit("AustraliaX"),
        "CaseRep_Address5" : lit("NLX"),
        # "CaseRep_Postcode" : lit("Hill SquareX"),
         
         }
)


display(bronze_table.toDF().filter(col("CaseNo").isin("HU/00185/2025", "HU/02151/2024")).select("CaseNo", "CaseRep_Address1", "CaseRep_Address2", "CaseRep_Address3", "CaseRep_Address4" , "CaseRep_Address5", "CaseRep_Postcode"))


#################
#valid_oocAddressLine1 valid_oocAddressLine2
#change in ooc4 ReunionX to GuamX

bronze_table = DeltaTable.forName(spark, "ariadm_active_appeals.bronze_appealcase_crep_rep_floc_cspon_cfs")
display(bronze_table.toDF().filter(col("CaseNo").isin("HU/02191/2024", "HU/01475/2024")).select("CaseNo", "CaseRep_Address1", "CaseRep_Address2", "CaseRep_Address3", "CaseRep_Address4" , "CaseRep_Address5", "CaseRep_Postcode"))

bronze_table.update(
    condition=col("CaseNo").isin("HU/01475/2024", "HU/02191/2024"),
    set={
        "CaseRep_Address4" : lit("AustraliaX")                 
         }
)


display(bronze_table.toDF().filter(col("CaseNo").isin("HU/02191/2024", "HU/01475/2024")).select("CaseNo", "CaseRep_Address1", "CaseRep_Address2", "CaseRep_Address3", "CaseRep_Address4" , "CaseRep_Address5", "CaseRep_Postcode"))



################################

cases_to_update = ['EA/02806/2023',
'HU/00575/2025',
'HU/00581/2025',
'HU/00447/2025',
'HU/00574/2023',
'HU/00591/2025',
'EA/00588/2025',
'EA/00551/2025',
'HU/00304/2025',
'EA/00495/2025',
'EA/00560/2025',
'EA/06826/2022',
'EA/00490/2025',
'EA/00554/2025',
'EA/01778/2024',
'HU/00562/2025',
'EA/00552/2025',
'HU/00511/2025',
'EA/00483/2025',
'EA/09676/2022',
'EA/00493/2025',
'HU/00224/2025',
'EA/00496/2025',
'EA/00538/2025',
'EA/00558/2025',
'EA/08372/2022',
'HU/00822/2024',
'HU/00574/2025',
'EA/02065/2024',
'HU/00442/2025',
'EA/00586/2025',
'HU/00590/2025',
'HU/00569/2025',
'EA/00557/2025',
'HU/02346/2024',
'EA/00562/2025',
'HU/00573/2025',
'HU/00571/2025',
'EA/01893/2023',
'EA/00584/2025',
'HU/00579/2025',
'HU/00555/2025',
'HU/00583/2025',
'EA/00591/2025',
'EA/00556/2025',
'EA/00497/2025',
'HU/01972/2023',
'EA/00437/2025',
'HU/00577/2025',
'EA/00585/2025',
'HU/00252/2025',
'HU/00557/2025',
'EA/00485/2025',
'HU/00563/2025',
'HU/00278/2025',
'EA/00559/2025',
'EA/00553/2025',
'HU/00578/2025',
'HU/00445/2025',
'HU/00325/2025',
'EA/00555/2025',
'HU/00572/2025',
'HU/00582/2025',
'EA/00587/2025',
'HU/00638/2024',
'HU/00453/2025',
"EA/00289/2025"
]


bronze_table = DeltaTable.forName(spark, "ariadm_active_appeals.bronze_appealcase_caseappellant_appellant")

display(bronze_table.toDF().filter(col("CaseNo").isin(cases_to_update)).select("CaseNo", "Appellant_Address4"))

bronze_table.update(
    condition=col("CaseNo").isin(cases_to_update),
    set={
        "Appellant_Address4" : lit("AustraliaX")                 
         }
)


display(bronze_table.toDF().filter(col("CaseNo").isin(cases_to_update)).select("CaseNo", "Appellant_Address4"))

