In [0]:
import logging

In [0]:
logger = logging.getLogger("DatabricksWorkflow")
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
if not logger.hasHandlers():
    logger.addHandler(handler)

In [0]:
config_path = "dbfs:/configs/config.json"
try:
    config = spark.read.option("multiline", "true").json(config_path)
    logger.info(f"Successfully read config file from {config_path}")
except Exception as e:
    logger.error(f"Could not read config file at {config_path}: {e}", exc_info=True)
    raise FileNotFoundError(f"Could not read config file at {config_path}: {e}")

try:
    first_row = config.first()
    env = first_row["env"].strip().lower()
    lz_key = first_row["lz_key"].strip().lower()
    logger.info(f"Extracted configs: env={env}, lz_key={lz_key}")
except Exception as e:
    logger.error(f"Missing expected keys 'env' or 'lz_key' in config file: {e}", exc_info=True)
    raise KeyError(f"Missing expected keys 'env' or 'lz_key' in config file: {e}")

try:
    keyvault_name = f"ingest{lz_key}-meta002-{env}"
    logger.info(f"Constructed keyvault name: {keyvault_name}")
except Exception as e:
    logger.error(f"Error constructing keyvault name: {e}", exc_info=True)
    raise ValueError(f"Error constructing keyvault name: {e}")


In [0]:
try:
    client_secret = dbutils.secrets.get(scope=keyvault_name, key='SERVICE-PRINCIPLE-CLIENT-SECRET')
    logger.info("Successfully retrieved SERVICE-PRINCIPLE-CLIENT-SECRET from Key Vault")
except Exception as e:
    logger.error(f"Could not retrieve 'SERVICE-PRINCIPLE-CLIENT-SECRET' from Key Vault '{keyvault_name}': {e}", exc_info=True)
    raise KeyError(f"Could not retrieve 'SERVICE-PRINCIPLE-CLIENT-SECRET' from Key Vault '{keyvault_name}': {e}")

try:
    tenant_id = dbutils.secrets.get(scope=keyvault_name, key='SERVICE-PRINCIPLE-TENANT-ID')
    logger.info("Successfully retrieved SERVICE-PRINCIPLE-TENANT-ID from Key Vault")
except Exception as e:
    logger.error(f"Could not retrieve 'SERVICE-PRINCIPLE-TENANT-ID' from Key Vault '{keyvault_name}': {e}", exc_info=True)
    raise KeyError(f"Could not retrieve 'SERVICE-PRINCIPLE-TENANT-ID' from Key Vault '{keyvault_name}': {e}")

try:
    client_id = dbutils.secrets.get(scope=keyvault_name, key='SERVICE-PRINCIPLE-CLIENT-ID')
    logger.info("Successfully retrieved SERVICE-PRINCIPLE-CLIENT-ID from Key Vault")
except Exception as e:
    logger.error(f"Could not retrieve 'SERVICE-PRINCIPLE-CLIENT-ID' from Key Vault '{keyvault_name}': {e}", exc_info=True)
    raise KeyError(f"Could not retrieve 'SERVICE-PRINCIPLE-CLIENT-ID' from Key Vault '{keyvault_name}': {e}")

logger.info("✅ Successfully retrieved all Service Principal secrets from Key Vault")


In [0]:
# --- Parameterise containers ---
curated_storage_account = f"ingest{lz_key}curated{env}"
curated_container = "gold"
silver_curated_container = "silver"
checkpoint_storage_account = f"ingest{lz_key}xcutting{env}"

# --- Assign OAuth to storage accounts ---
storage_accounts = [curated_storage_account, checkpoint_storage_account]

for storage_account in storage_accounts:
    try:
        configs = {
            f"fs.azure.account.auth.type.{storage_account}.dfs.core.windows.net": "OAuth",
            f"fs.azure.account.oauth.provider.type.{storage_account}.dfs.core.windows.net":
                "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
            f"fs.azure.account.oauth2.client.id.{storage_account}.dfs.core.windows.net": client_id,
            f"fs.azure.account.oauth2.client.secret.{storage_account}.dfs.core.windows.net": client_secret,
            f"fs.azure.account.oauth2.client.endpoint.{storage_account}.dfs.core.windows.net":
                f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
        }

        for key, val in configs.items():
            try:
                spark.conf.set(key, val)
            except Exception as e:
                logger.error(f"Failed to set Spark config '{key}' for storage account '{storage_account}': {e}", exc_info=True)
                raise RuntimeError(f"Failed to set Spark config '{key}' for storage account '{storage_account}': {e}")

        logger.info(f"✅ Successfully configured OAuth for storage account: {storage_account}")

    except Exception as e:
        logger.error(f"Error configuring OAuth for storage account '{storage_account}': {e}", exc_info=True)
        raise RuntimeError(f"Error configuring OAuth for storage account '{storage_account}': {e}")


In [0]:
ccdCallResult_path = f"abfss://silver@ingest{lz_key}curated{env}.dfs.core.windows.net/ARIADM/ACTIVE/CCD/AUDIT/APPEALS/all_active_states/ack_audit"

try:
    # Check if files exist
    files = dbutils.fs.ls(ccdCallResult_path)
    if files:
        logger.info(f"Found {len(files)} files in path: {[f.name for f in files]}")
    else:
        logger.warning(f"No files found at path: {ccdCallResult_path}")
    
    # Read Delta and create temp view
    ccdCallResult = spark.read.format("delta").load(ccdCallResult_path)
    ccdCallResult.createOrReplaceTempView("ccdCallResult")
    logger.info(f"Temp view 'ccdCallResult' created with {ccdCallResult.count()} rows")

except Exception as e:
    logger.error(f"Error reading Delta table or creating temp view: {e}", exc_info=True)

In [0]:
# %sql
# SELECT * FROM ccdCallResult LIMIT 5

In [0]:
payload_table_path = f"abfss://silver@ingest{lz_key}curated{env}.dfs.core.windows.net/ARIADM/ACTIVE/CCD/APPEALS/all_states_combined/publish_audit_db_eh"

try:
    # Check if files exist
    files = dbutils.fs.ls(payload_table_path)
    if files:
        logger.info(f"Found {len(files)} files in path: {[f.name for f in files]}")
    else:
        logger.warning(f"No files found at path: {payload_table_path}")
    
    # Read Delta and create temp view
    publishPayloadResult = spark.read.format("delta").load(payload_table_path)
    publishPayloadResult.createOrReplaceTempView("publishPayloadResult")
    logger.info(f"Temp view 'publishPayloadResult' created with {publishPayloadResult.count()} rows")

except Exception as e:
    logger.error(f"Error reading Delta table or creating temp view: {e}", exc_info=True)

In [0]:
# %sql
# SELECT * FROM publishPayloadResult LIMIT 5

In [0]:
%sql
SELECT t1.runID,
       t2.runID,
       t1.State,
       t1.CaseNo,
       t1.Status as `CCD Publish Payload Status`,
       t2.Status as `CCD Call Status`,
       t1.PublishingDateTime as `CCD Publish Payload Publishing Date Time`,
       t1.Error as `CCD Payload Publisher Error`,
       t2.StartDateTime as `CCD Call Function App Start Date Time`,
       t2.EndDateTime as `CCD Call Function App End Date Time`,
       t2.CCDCaseId as `CCD Case ID`,
       t2.Error as `CCD Call Function App Error`


FROM publishPayloadResult t1
FULL OUTER JOIN ccdCallResult t2 ON t1.CaseNo = t2.CaseNo AND t1.State = t2.State --  AND t1.RunID = t2.RunID