In [0]:
from confluent_kafka import Producer
import json
from  itertools import islice
import numpy as np
from pyspark.sql.functions import col, decode, split, element_at, udf, lit, reduce, from_json
import logging
from pyspark.sql.types import StructType, StructField, StringType
import datetime
from pyspark.sql import SparkSession, DataFrame
from pyspark import SparkContext
import os
from functools import reduce
import time

In [0]:
# --- Load configuration JSON ---
config_path = "dbfs:/configs/config.json"
try:
    config = spark.read.option("multiline", "true").json(config_path)
except Exception as e:
    raise FileNotFoundError(f"Could not read config file at {config_path}: {e}")

# --- Extract environment and lz_key ---
try:
    first_row = config.first()
    env = first_row["env"].strip().lower()
    lz_key = first_row["lz_key"].strip().lower()
except Exception as e:
    raise KeyError(f"Missing expected keys 'env' or 'lz_key' in config file: {e}")

# --- Construct keyvault name ---
try:
    keyvault_name = f"ingest{lz_key}-meta002-{env}"
except Exception as e:
    raise ValueError(f"Error constructing keyvault name: {e}")

print(f"Loaded configs successfully. env={env}, lz_key={lz_key}, keyvault={keyvault_name}")


In [0]:
# Access the Service Principle secrets from keyvaults
try:
    client_secret = dbutils.secrets.get(scope=keyvault_name, key='SERVICE-PRINCIPLE-CLIENT-SECRET')
except Exception as e:
    raise KeyError(f"Could not retrieve 'SERVICE-PRINCIPLE-CLIENT-SECRET' from Key Vault '{keyvault_name}': {e}")

try:
    tenant_id = dbutils.secrets.get(scope=keyvault_name, key='SERVICE-PRINCIPLE-TENANT-ID')
except Exception as e:
    raise KeyError(f"Could not retrieve 'SERVICE-PRINCIPLE-TENANT-ID' from Key Vault '{keyvault_name}': {e}")

try:
    client_id = dbutils.secrets.get(scope=keyvault_name, key='SERVICE-PRINCIPLE-CLIENT-ID')
except Exception as e:
    raise KeyError(f"Could not retrieve 'SERVICE-PRINCIPLE-CLIENT-ID' from Key Vault '{keyvault_name}': {e}")

print("✅ Successfully retrieved all Service Principal secrets.")

In [0]:
## Paramaterise containers
curated_storage_account = f"ingest{lz_key}curated{env}"
curated_container = "gold"
silver_curated_container = "silver"

curated_storage_account = f"ingest{lz_key}curated{env}"
checkpoint_storage_account = f"ingest{lz_key}xcutting{env}"

##Assign OAuth to curated storage account
storage_accounts = [curated_storage_account, checkpoint_storage_account]

for storage_account in storage_accounts:
    try:
        configs = {
            f"fs.azure.account.auth.type.{storage_account}.dfs.core.windows.net": "OAuth",
            f"fs.azure.account.oauth.provider.type.{storage_account}.dfs.core.windows.net":
                "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
            f"fs.azure.account.oauth2.client.id.{storage_account}.dfs.core.windows.net": client_id,
            f"fs.azure.account.oauth2.client.secret.{storage_account}.dfs.core.windows.net": client_secret,
            f"fs.azure.account.oauth2.client.endpoint.{storage_account}.dfs.core.windows.net":
                f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
        }

        for key, val in configs.items():
            try:
                spark.conf.set(key, val)
            except Exception as e:
                raise RuntimeError(f"Failed to set Spark config '{key}' for storage account '{storage_account}': {e}")

        print(f"✅ Successfully configured OAuth for storage account: {storage_account}")

    except Exception as e:
        raise RuntimeError(f"Error configuring OAuth for storage account '{storage_account}': {e}")

In [0]:
# Retrieve the state parameter from the Databricks Workflow
dbutils.widgets.text("state", "paymentPending", "State to Process")
state = dbutils.widgets.get("state")
print(f"🔄 Processing state: {state}")

##Call the ack eventhub which will be used to hold the messages incoming from CCD
EH_NAMESPACE = f"ingest{lz_key}-integration-eventHubNamespace001-{env}"
EH_NAME = f"evh-active-ack-{lz_key}-uks-dlrm-01"

connection_string = dbutils.secrets.get(keyvault_name, "RootManageSharedAccessKey")

KAFKA_OPTIONS = {
    "kafka.bootstrap.servers": f"{EH_NAMESPACE}.servicebus.windows.net:9093",
    "subscribe": EH_NAME,
    "consumer.group.id": state,
    "kafka.security.protocol": "SASL_SSL",
    "failOnDataLoss": "false",
    "startingOffsets": "latest",
    "kafka.sasl.mechanism": "PLAIN",
    "kafka.sasl.jaas.config": f'kafkashaded.org.apache.kafka.common.security.plain.PlainLoginModule required username="$ConnectionString" password="{connection_string}";'
}

In [0]:
# Paths specific to this state
data_path = f"abfss://silver@ingest{lz_key}curated{env}.dfs.core.windows.net/ARIADM/ACTIVE/CCD/APPEALS/{state}/publish_audit_db_eh/"
checkpoint_path = f"abfss://db-ack-checkpoint@ingest{lz_key}xcutting{env}.dfs.core.windows.net/{state}/ACK/"

print(f"📂 Data path: {data_path}")
print(f"📂 Checkpoint path: {checkpoint_path}")

# Keep schema exactly as it exists in Pub notebook
schema = StructType([
    StructField("RunID", StringType(), True),
    StructField("CaseNo", StringType(), True),
    StructField("Filename", StringType(), True),
    StructField("State", StringType(), True),
    StructField("PublishingDateTime", StringType(), True),
    StructField("Status", StringType(), True),
    StructField("Error", StringType(), True)
])

# --- Read data from Kafka ---
try:
    eventhubdf = spark.readStream.format("kafka").options(**KAFKA_OPTIONS).load()
    print("✅ Successfully connected to Kafka topic")
except Exception as e:
    raise RuntimeError(f"Failed to read from Kafka topic: {e}")

# --- Parse Kafka messages ---
try:
    parsed_df = (
        eventhubdf
        .select(col("value").cast("string").alias("json_str"))
        .select(from_json(col("json_str"), schema).alias("json_obj"))
        .select("json_obj.*")
    )
    print("✅ Successfully parsed Kafka messages")
except Exception as e:
    raise RuntimeError(f"Failed to parse Kafka messages: {e}")

# --- Write stream to Delta with checkpointing ---
try:
    query = (
        parsed_df.writeStream
        .format("delta")
        .option("checkpointLocation", checkpoint_path)
        .outputMode("append")
        .start(data_path)
    )
    # Wait briefly for the stream to process data
    query.awaitTermination(timeout=15)
    query.stop()
    print("✅ Successfully written stream to Delta")
except Exception as e:
    raise RuntimeError(f"Failed to write stream to Delta: {e}")

# --- Read resulting Delta table ---
try:
    df = (
        spark.read.format("delta")
        .load(data_path)
        .filter(col("Status").isNotNull())
    )
    display(df)
    display(df.groupBy("State").count())
    print(f"📊 Total rows loaded: {df.count()}")
except Exception as e:
    raise RuntimeError(f"Failed to read Delta table from {data_path}: {e}")

In [0]:
{
    "id": 1757950664125654,
    "jurisdiction": "IA",
    "state": "migrated",
    "version": 0,
    "case_type_id": "Asylum",
    "created_date": "2025-09-15T15:37:44.106121605",
    "last_modified": "2025-09-15T15:37:44.794452375",
    "last_state_modified_date": "2025-09-15T15:37:44.106121605",
    "security_classification": "PUBLIC",
    "case_data": {
        "appellantInUk": "Yes",
        "staffLocation": "Taylor House",
        "currentCaseStateVisibleToHomeOfficeAll": "migrated",
        "feeWithHearing": "140",
        "uploadAddendumEvidenceLegalRepActionAvailable": "No",
        "paidDate": "2024-08-05",
        "hearingCentre": "taylorHouse",
        "applicationChangeDesignatedHearingCentre": "taylorHouse",
        "isIntegrated": "No",
        "isEjp": "No",
        "isNabaAdaEnabled": "No",
        "submissionOutOfTime": "No",
        "uploadAddendumEvidenceHomeOfficeActionAvailable": "No",
        "legalRepFamilyName": "",
        "isAdmin": "Yes",
        "s94bStatus": "No",
        "currentCaseStateVisibleToHomeOfficeGeneric": "migrated",
        "remissionType": "hoWaiverRemission",
        "hasServiceRequestAlready": "No",
        "feeVersion": "2",
        "legalRepOrganisationPartyId": "71c50709-b802-42c7-ac56-2ef03e6e14e7",
        "hmctsCaseNameInternal": "GivenName Migration 3 FamilyName appealSubmitted",
        "appellantPartyId": "45889c92-2cf4-4dae-ae9a-f64aa051d525",
        "paidAmount": "14000",
        "appellantHasFixedAddress": "Yes",
        "uploadAddendumEvidenceAdminOfficerActionAvailable": "No",
        "hearingTypeResult": "No",
        "caseManagementLocation": {
            "region": "1",
            "baseLocation": "765324"
        },
        "appealSubmissionDate": "2025-09-15",
        "ariaMigrationTaskDueDays": "2",
        "adminDeclaration1": [
            "hasDeclared"
        ],
        "appealOutOfCountry": "No",
        "letterSentOrReceived": "Sent",
        "caseManagementCategory": {
            "value": {
                "code": "refusalOfHumanRights",
                "label": "Refusal of a human rights claim"
            },
            "list_items": [
                {
                    "code": "refusalOfHumanRights",
                    "label": "Refusal of a human rights claim"
                }
            ]
        },
        "feeDescription": "Appeal determined with a hearing",
        "appealReferenceNumber": "HU/50001/2025",
        "feeCode": "FEE0238",
        "ariaDesiredState": "appealSubmitted",
        "appellantNationalities": [
            {
                "value": {
                    "code": "GB"
                },
                "id": "520cd556-39b3-4729-9093-a07513f4b03e"
            }
        ],
        "markEvidenceAsReviewedActionAvailable": "No",
        "automaticEndAppealTimedEventId": "fd614594-6b6b-4116-8568-f0d80298486e",
        "changeDirectionDueDateActionAvailable": "No",
        "email": "example@test.com",
        "currentCaseStateVisibleToJudge": "migrated",
        "feeAmountGbp": "14000",
        "decisionHearingFeeOption": "decisionWithHearing",
        "SearchCriteria": {
            "OtherCaseReferences": [
                {
                    "value": "HU/50001/2025",
                    "id": "0b55fdef-04f5-428b-9cef-a61ade937c15"
                }
            ],
            "SearchParties": [
                {
                    "value": {
                        "Name": "GivenName Migration 3 FamilyName appealSubmitted",
                        "EmailAddress": "example@test.com",
                        "AddressLine1": "Flat 101",
                        "PostCode": "SE10 0XX",
                        "DateOfBirth": "2000-01-01"
                    },
                    "id": "53f9be65-7de5-4856-b287-a006509b60ab"
                }
            ]
        },
        "paymentDescription": "Appeal determined with a hearing",
        "currentCaseStateVisibleToHomeOfficeApc": "migrated",
        "internalAppellantEmail": "example@test.com",
        "isAriaMigratedFilter": "Yes",
        "isDlrmFeeRemissionEnabled": "Yes",
        "tribunalDocuments": [],
        "isOutOfCountryEnabled": "Yes",
        "appellantAddress": {
            "County": "",
            "Country": "United Kingdom",
            "PostCode": "SE10 0XX",
            "PostTown": "London",
            "AddressLine1": "Flat 101",
            "AddressLine2": "10 Cutter Lane",
            "AddressLine3": ""
        },
        "isFeePaymentEnabled": "Yes",
        "uploadTheNoticeOfDecisionDocs": [],
        "haveHearingAttendeesAndDurationBeenRecorded": "No",
        "uploadAddendumEvidenceActionAvailable": "No",
        "appellantGivenNames": "GivenName Migration 3",
        "appellantFamilyName": "FamilyName appealSubmitted",
        "isServiceRequestTabVisibleConsideringRemissions": "Yes",
        "markAddendumEvidenceAsReviewedActionAvailable": "No",
        "appellantStateless": "hasNationality",
        "currentCaseStateVisibleToHomeOfficePou": "migrated",
        "currentCaseStateVisibleToAdminOfficer": "migrated",
        "hmctsCaseCategory": "Human rights",
        "tribunalReceivedDate": "2024-08-05",
        "sendDirectionActionAvailable": "No",
        "appellantsRepresentation": "Yes",
        "deportationOrderOptions": "No",
        "homeOfficeReferenceNumber": "012345678",
        "isAriaMigrated": "Yes",
        "currentCaseStateVisibleToLegalRepresentative": "migrated",
        "appealGroundsForDisplay": [],
        "ccdReferenceNumberForDisplay": "1723 0197 9804 1350",
        "isAppellantMinor": "No",
        "legalRepresentativeDocuments": [],
        "hasOtherAppeals": "No",
        "currentCaseStateVisibleToHomeOfficeLart": "migrated",
        "uploadTheAppealFormDocs": [],
        "appealSubmissionInternalDate": "2024-08-07",
        "appealType": "refusalOfHumanRights",
        "isRemissionsEnabled": "Yes",
        "additionalPaymentInfo": "Additional paid information",
        "uploadAdditionalEvidenceActionAvailable": "No",
        "legalRepIndividualPartyId": "f7159136-7bff-40fb-921a-c8a53633afc8",
        "localAuthorityPolicy": {
            "Organisation": {},
            "OrgPolicyCaseAssignedRole": "[LEGALREPRESENTATIVE]"
        },
        "homeOfficeDecisionDate": "2024-08-05",
        "ariaDesiredStateSelectedValue": "Appeal submitted",
        "paymentStatus": "Paid",
        "feePaymentAppealType": "Yes",
        "uploadAdditionalEvidenceHomeOfficeActionAvailable": "No",
        "isNabaEnabledOoc": "No",
        "appellantDateOfBirth": "2000-01-01",
        "caseNameHmctsInternal": "GivenName Migration 3 FamilyName appealSubmitted",
        "currentCaseStateVisibleToCaseOfficer": "migrated",
        "appellantNameForDisplay": "GivenName Migration 3 FamilyName appealSubmitted",
        "notificationsSent": [],
        "searchPostcode": "SE10 0XX",
        "paymentDate": "2024-08-05",
        "isNabaEnabled": "No"
    },
    "data_classification": {
        "appellantInUk": "PUBLIC",
        "staffLocation": "PUBLIC",
        "currentCaseStateVisibleToHomeOfficeAll": "PUBLIC",
        "feeWithHearing": "PUBLIC",
        "uploadAddendumEvidenceLegalRepActionAvailable": "PUBLIC",
        "paidDate": "PUBLIC",
        "hearingCentre": "PUBLIC",
        "applicationChangeDesignatedHearingCentre": "PUBLIC",
        "isIntegrated": "PUBLIC",
        "isEjp": "PUBLIC",
        "isNabaAdaEnabled": "PUBLIC",
        "submissionOutOfTime": "PUBLIC",
        "uploadAddendumEvidenceHomeOfficeActionAvailable": "PUBLIC",
        "legalRepFamilyName": "PUBLIC",
        "isAdmin": "PUBLIC",
        "s94bStatus": "PUBLIC",
        "currentCaseStateVisibleToHomeOfficeGeneric": "PUBLIC",
        "remissionType": "PUBLIC",
        "hasServiceRequestAlready": "PUBLIC",
        "feeVersion": "PUBLIC",
        "legalRepOrganisationPartyId": "PUBLIC",
        "hmctsCaseNameInternal": "PUBLIC",
        "appellantPartyId": "PUBLIC",
        "paidAmount": "PUBLIC",
        "appellantHasFixedAddress": "PUBLIC",
        "uploadAddendumEvidenceAdminOfficerActionAvailable": "PUBLIC",
        "hearingTypeResult": "PUBLIC",
        "caseManagementLocation": {
            "classification": "PUBLIC",
            "value": {
                "region": "PUBLIC",
                "baseLocation": "PUBLIC"
            }
        },
        "appealSubmissionDate": "PUBLIC",
        "ariaMigrationTaskDueDays": "PUBLIC",
        "adminDeclaration1": "PUBLIC",
        "appealOutOfCountry": "PUBLIC",
        "letterSentOrReceived": "PUBLIC",
        "caseManagementCategory": "PUBLIC",
        "feeDescription": "PUBLIC",
        "appealReferenceNumber": "PUBLIC",
        "feeCode": "PUBLIC",
        "ariaDesiredState": "PUBLIC",
        "appellantNationalities": {
            "classification": "PUBLIC",
            "value": [
                {
                    "value": {
                        "code": "PUBLIC"
                    },
                    "id": "520cd556-39b3-4729-9093-a07513f4b03e"
                }
            ]
        },
        "markEvidenceAsReviewedActionAvailable": "PUBLIC",
        "automaticEndAppealTimedEventId": "PUBLIC",
        "changeDirectionDueDateActionAvailable": "PUBLIC",
        "email": "PUBLIC",
        "currentCaseStateVisibleToJudge": "PUBLIC",
        "feeAmountGbp": "PUBLIC",
        "decisionHearingFeeOption": "PUBLIC",
        "SearchCriteria": {
            "classification": "PUBLIC",
            "value": {
                "OtherCaseReferences": {
                    "classification": "PUBLIC",
                    "value": [
                        {
                            "id": "0b55fdef-04f5-428b-9cef-a61ade937c15",
                            "classification": "PUBLIC"
                        }
                    ]
                },
                "SearchParties": {
                    "classification": "PUBLIC",
                    "value": [
                        {
                            "value": {
                                "Name": "PUBLIC",
                                "EmailAddress": "PUBLIC",
                                "AddressLine1": "PUBLIC",
                                "PostCode": "PUBLIC",
                                "DateOfBirth": "PUBLIC"
                            },
                            "id": "53f9be65-7de5-4856-b287-a006509b60ab"
                        }
                    ]
                }
            }
        },
        "paymentDescription": "PUBLIC",
        "currentCaseStateVisibleToHomeOfficeApc": "PUBLIC",
        "internalAppellantEmail": "PUBLIC",
        "isAriaMigratedFilter": "PUBLIC",
        "isDlrmFeeRemissionEnabled": "PUBLIC",
        "tribunalDocuments": {
            "classification": "PUBLIC",
            "value": []
        },
        "isOutOfCountryEnabled": "PUBLIC",
        "appellantAddress": {
            "classification": "PUBLIC",
            "value": {
                "County": "PUBLIC",
                "Country": "PUBLIC",
                "PostCode": "PUBLIC",
                "PostTown": "PUBLIC",
                "AddressLine1": "PUBLIC",
                "AddressLine2": "PUBLIC",
                "AddressLine3": "PUBLIC"
            }
        },
        "isFeePaymentEnabled": "PUBLIC",
        "uploadTheNoticeOfDecisionDocs": {
            "classification": "PUBLIC",
            "value": []
        },
        "haveHearingAttendeesAndDurationBeenRecorded": "PUBLIC",
        "uploadAddendumEvidenceActionAvailable": "PUBLIC",
        "appellantGivenNames": "PUBLIC",
        "appellantFamilyName": "PUBLIC",
        "isServiceRequestTabVisibleConsideringRemissions": "PUBLIC",
        "markAddendumEvidenceAsReviewedActionAvailable": "PUBLIC",
        "appellantStateless": "PUBLIC",
        "currentCaseStateVisibleToHomeOfficePou": "PUBLIC",
        "currentCaseStateVisibleToAdminOfficer": "PUBLIC",
        "hmctsCaseCategory": "PUBLIC",
        "tribunalReceivedDate": "PUBLIC",
        "sendDirectionActionAvailable": "PUBLIC",
        "appellantsRepresentation": "PUBLIC",
        "deportationOrderOptions": "PUBLIC",
        "homeOfficeReferenceNumber": "PUBLIC",
        "isAriaMigrated": "PUBLIC",
        "currentCaseStateVisibleToLegalRepresentative": "PUBLIC",
        "appealGroundsForDisplay": "PUBLIC",
        "ccdReferenceNumberForDisplay": "PUBLIC",
        "isAppellantMinor": "PUBLIC",
        "legalRepresentativeDocuments": {
            "classification": "PUBLIC",
            "value": []
        },
        "hasOtherAppeals": "PUBLIC",
        "currentCaseStateVisibleToHomeOfficeLart": "PUBLIC",
        "uploadTheAppealFormDocs": {
            "classification": "PUBLIC",
            "value": []
        },
        "appealSubmissionInternalDate": "PUBLIC",
        "appealType": "PUBLIC",
        "isRemissionsEnabled": "PUBLIC",
        "additionalPaymentInfo": "PUBLIC",
        "uploadAdditionalEvidenceActionAvailable": "PUBLIC",
        "legalRepIndividualPartyId": "PUBLIC",
        "homeOfficeDecisionDate": "PUBLIC",
        "ariaDesiredStateSelectedValue": "PUBLIC",
        "paymentStatus": "PUBLIC",
        "feePaymentAppealType": "PUBLIC",
        "uploadAdditionalEvidenceHomeOfficeActionAvailable": "PUBLIC",
        "isNabaEnabledOoc": "PUBLIC",
        "appellantDateOfBirth": "PUBLIC",
        "caseNameHmctsInternal": "PUBLIC",
        "currentCaseStateVisibleToCaseOfficer": "PUBLIC",
        "appellantNameForDisplay": "PUBLIC",
        "notificationsSent": {
            "classification": "PUBLIC",
            "value": []
        },
        "searchPostcode": "PUBLIC",
        "paymentDate": "PUBLIC",
        "isNabaEnabled": "PUBLIC"
    },
    "supplementary_data": null,
    "after_submit_callback_response": {
        "confirmation_header": null,
        "confirmation_body": null
    },
    "callback_response_status_code": 200,
    "callback_response_status": "CALLBACK_COMPLETED",
    "delete_draft_response_status_code": null,
    "delete_draft_response_status": null
}