In [0]:
spark.conf.set("pipelines.tableManagedByMultiplePipelinesCheck.enabled", "false")

In [0]:
import dlt
import json
from pyspark.sql.functions import when, col,coalesce, current_timestamp, lit, date_format
from pyspark.sql.functions import *
from pyspark.sql import functions as F
from pyspark.sql.types import *
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from pyspark.sql.window import Window

In [0]:
config = spark.read.option("multiline", "true").json("dbfs:/configs/config.json")
env_name = config.first()["env"].strip().lower()
lz_key = config.first()["lz_key"].strip().lower()

print(f"env_code: {lz_key}")  # This won't be redacted
print(f"env_name: {env_name}")  # This won't be redacted

KeyVault_name = f"ingest{lz_key}-meta002-{env_name}"
print(f"KeyVault_name: {KeyVault_name}") 

In [0]:
# Service principal credentials
client_id = dbutils.secrets.get(KeyVault_name, "SERVICE-PRINCIPLE-CLIENT-ID")
client_secret = dbutils.secrets.get(KeyVault_name, "SERVICE-PRINCIPLE-CLIENT-SECRET")
tenant_id = dbutils.secrets.get(KeyVault_name, "SERVICE-PRINCIPLE-TENANT-ID")

# Storage account names
curated_storage = f"ingest{lz_key}curated{env_name}"
checkpoint_storage = f"ingest{lz_key}xcutting{env_name}"
raw_storage = f"ingest{lz_key}raw{env_name}"
landing_storage = f"ingest{lz_key}landing{env_name}"
external_storage = f"ingest{lz_key}external{env_name}"


# Spark config for curated storage (Delta table)
spark.conf.set(f"fs.azure.account.auth.type.{curated_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{curated_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{curated_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{curated_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{curated_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

# Spark config for checkpoint storage
spark.conf.set(f"fs.azure.account.auth.type.{checkpoint_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{checkpoint_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{checkpoint_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{checkpoint_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{checkpoint_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

# Spark config for checkpoint storage
spark.conf.set(f"fs.azure.account.auth.type.{raw_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{raw_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{raw_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{raw_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{raw_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

# Spark config for checkpoint storage
spark.conf.set(f"fs.azure.account.auth.type.{landing_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{landing_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{landing_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{landing_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{landing_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")


# Spark config for checkpoint storage
spark.conf.set(f"fs.azure.account.auth.type.{external_storage}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{external_storage}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{external_storage}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{external_storage}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{external_storage}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

In [0]:
# read_hive = False

# Setting variables for use in subsequent cells
raw_path = f"abfss://raw@ingest{lz_key}raw{env_name}.dfs.core.windows.net/ARIADM/ACTIVE/CCD/APPEALS"
landing_path = f"abfss://landing@ingest{lz_key}landing{env_name}.dfs.core.windows.net/SQLServer/Sales/IRIS/dbo/"
bronze_path = f"abfss://bronze@ingest{lz_key}curated{env_name}.dfs.core.windows.net/ARIADM/ACTIVE/CCD/APPEALS"
silver_path = f"abfss://silver@ingest{lz_key}curated{env_name}.dfs.core.windows.net/ARIADM/ACTIVE/CCD/APPEALS"
gold_path = f"abfss://gold@ingest{lz_key}curated{env_name}.dfs.core.windows.net/ARIADM/ACTIVE/CCD/APPEALS"
external_path = f"abfss:///external-csv@ingest{lz_key}external{env_name}.dfs.core.windows.net/"
gold_outputs = "ARIADM/CCD/APPEALS"
hive_schema = "ariadm_ccd_apl"
# key_vault = "ingest00-keyvault-sbox"

html_path = f"abfss://html-template@ingest{lz_key}landing{env_name}.dfs.core.windows.net/"

# Print all variables
variables = {
    # "read_hive": read_hive,
    "raw_path": raw_path,
    "landing_path": landing_path,
    "bronze_path": bronze_path,
    "silver_path": silver_path,
    "gold_path": gold_path,
    "html_path": html_path,
    "gold_outputs": gold_outputs,
    "hive_schema": hive_schema,
    "key_vault": KeyVault_name
}

display(variables)

In [0]:
# spark.read.table('ariadm_active_appeals_bronze.bronze_appealcase_crep_rep_floc_cspon_cfs').display()
# spark.read.table('ariadm_active_appeals_bronze.bronze_appealcase_crep_rep_floc_cspon_cfs').filter(col("CaseNo").isin("AA/00026/2005","EA/00026/2005/COPY")).display()
# spark.read.table('ariadm_active_appeals_bronze.bronze_appealcase_caseappellant_appellant').filter(col("CaseNo").isin("AA/00026/2005","EA/00026/2005/COPY")).display()

In [0]:
# spark.read.table('hive_metastore.ariadm_active_appeals_bronze.bronze_status_htype_clist_list_ltype_court_lsitting_adj').display()
# spark.read.table('hive_metastore.ariadm_active_appeals_bronze.bronze_appealcase_transaction_transactiontype').display()
# spark.read.table('hive_metastore.ariadm_active_appeals_bronze.bronze_appealcase_link_linkdetail').display()
# spark.read.table('hive_metastore.ariadm_active_appeals_bronze.bronze_caseadjudicator_adjudicator').display()
# spark.read.table('hive_metastore.ariadm_active_appeals_bronze.bronze_appealcategory').display()
# spark.read.table('hive_metastore.ariadm_active_appeals_bronze.bronze_documentsreceived').display()
# spark.read.table('hive_metastore.ariadm_active_appeals_bronze.bronze_history').display()
# spark.read.table('hive_metastore.ariadm_active_appeals_bronze.bronze_history').printSchema()

In [0]:
config = [
    {
        "caseMapping": {
            "keyColumn": "CaseNo",
            "existingCaseNo": "AA/00026/2005",
            "newCaseNo": "AA/00026/2005/COPY"
        },
        "tables": {
            "hive_metastore.ariadm_active_appeals_bronze.bronze_appealcase_crep_rep_floc_cspon_cfs": {
                "CasePrefix": "ZZ",
                "CentreId": 444
            },
            "hive_metastore.ariadm_active_appeals_bronze.bronze_appealcase_caseappellant_appellant": {
                "Appellant_Email": "testytest123@gmail.com",
                "FCONumber": "555"
            },
            "hive_metastore.ariadm_active_appeals_bronze.bronze_status_htype_clist_list_ltype_court_lsitting_adj": {
                "CaseStatus": "100",
                "Outcome": 100
            },
            "hive_metastore.ariadm_active_appeals_bronze.bronze_appealcase_transaction_transactiontype": {
                "TransactionId": 454545,
                "Status": 454545
            },
            "hive_metastore.ariadm_active_appeals_bronze.bronze_appealcase_link_linkdetail": {
                "LinkNo": 55555,
                "ReasonLinkId": 55555
            },
            "hive_metastore.ariadm_active_appeals_bronze.bronze_caseadjudicator_adjudicator": {
                "Judge_Surname": "Simpson",
                "Judge_Forenames": "Homer"
            },
            "hive_metastore.ariadm_active_appeals_bronze.bronze_appealcategory": {
                "CategoryId": 50
            },
            "hive_metastore.ariadm_active_appeals_bronze.bronze_documentsreceived": {
                "ReceivedDocumentId": 50
            },
            "hive_metastore.ariadm_active_appeals_bronze.bronze_history": {
                "HistType": 100
            }
        }
    },
    {
        "caseMapping": {
            "keyColumn": "CaseNo",
            "existingCaseNo": "AA/00026/2005",
            "newCaseNo": "AA/00026/2005/COPY2"
        },
        "tables": {
            "hive_metastore.ariadm_active_appeals_bronze.bronze_appealcase_crep_rep_floc_cspon_cfs": {
                "CasePrefix": "ZX",
                "CentreId": 444
            },
            "hive_metastore.ariadm_active_appeals_bronze.bronze_appealcase_caseappellant_appellant": {
                "Appellant_Email": "testytest123@gmail.com",
                "FCONumber": "555"
            },
            "hive_metastore.ariadm_active_appeals_bronze.bronze_status_htype_clist_list_ltype_court_lsitting_adj": {
                "CaseStatus": "100",
                "Outcome": 100
            },
            "hive_metastore.ariadm_active_appeals_bronze.bronze_appealcase_transaction_transactiontype": {
                "TransactionId": 454545,
                "Status": 454545
            },
            "hive_metastore.ariadm_active_appeals_bronze.bronze_appealcase_link_linkdetail": {
                "LinkNo": 55555,
                "ReasonLinkId": 55555
            },
            "hive_metastore.ariadm_active_appeals_bronze.bronze_caseadjudicator_adjudicator": {
                "Judge_Surname": "Simpson",
                "Judge_Forenames": "Homer"
            },
            "hive_metastore.ariadm_active_appeals_bronze.bronze_appealcategory": {
                "CategoryId": 50
            },
            "hive_metastore.ariadm_active_appeals_bronze.bronze_documentsreceived": {
                "ReceivedDocumentId": 50
            },
            "hive_metastore.ariadm_active_appeals_bronze.bronze_history": {
                "HistType": 100
            }
        }
    }
        # ##CaseNo settings 3
]
 
    # ##CaseNo settings 3
    # {
    # "caseMapping": {
    #     "keyColumn": "CaseNo",
    #     "existingCaseNo": "AA/00026/2005",
    #     "newCaseNo": "EA/00026/2005/COPY"
    # },
    # "tables": {
    #     "hive_metastore.ariadm_active_appeals_bronze.bronze_appealcase_crep_rep_floc_cspon_cfs": {
    #         "CasePrefix" : "AB",
    #         "CentreId" : 444
    #     },
    #     "hive_metastore.ariadm_active_appeals_bronze.bronze_appealcase_caseappellant_appellant": {
    #         "Appellant_Email" : "testytest123@gmail.com",
    #         "FCONumber" : 555
    #     },
    #     "hive_metastore.ariadm_active_appeals_bronze.bronze_status_htype_clist_list_ltype_court_lsitting_adj": {
    #         "CaseStatus" : 100,
    #         "Outcome" : 100
    #     },
    #     "hive_metastore.ariadm_active_appeals_bronze.bronze_appealcase_transaction_transactiontype": {
    #         "TransactionId" : 454545,
    #         "Status" : 454545
    #     },
    #     "hive_metastore.ariadm_active_appeals_bronze.bronze_appealcase_link_linkdetail": {
    #         "LinkNo" : 55555,
    #         "ReasonLinkId" : 55555
    #     },
    #     "hive_metastore.ariadm_active_appeals_bronze.bronze_caseadjudicator_adjudicator": {
    #         "Judge_Surname" : "Simpson",
    #         "Judge_Forenames" : "Homer"
    #     },
    #     "hive_metastore.ariadm_active_appeals_bronze.bronze_appealcategory": {
    #         "CategoryId" : 50
    #     },
    #     "hive_metastore.ariadm_active_appeals_bronze.bronze_documentsreceived": {
    #         "ReceivedDocumentId" : 50
    #     },
    #     "hive_metastore.ariadm_active_appeals_bronze.bronze_history": {
    #         "HistType" : 100
    #     }}
    # }]
 
print(config)

In [0]:
def create_data(config_list):
 
    for config in config_list:
        key_col = config['caseMapping']['keyColumn']
        existing_case = config['caseMapping']['existingCaseNo']
        new_case = config['caseMapping']['newCaseNo']
 
        for table_name, overrides in config['tables'].items():
            print(f"Processing table: {table_name}")
           
            try:
                df = spark.table(table_name)
            except Exception as e:
                print(f"⚠️ Could not read table {table_name}: {e}")
                continue
           
            row_to_copy = df.filter(col(key_col) == existing_case)
           
            if row_to_copy.count() == 0:
                print(f"ℹ️ No row found with {key_col}={existing_case} in {table_name}")
                continue
           
            # 3️⃣ Apply column overrides from config
            for col_name, value in overrides.items():
                if col_name in df.columns:
                    row_to_copy = row_to_copy.withColumn(col_name, lit(value))
                else:
                    print(f"⚠️ Column {col_name} does not exist in {table_name}, skipping override")
           
            # 4️⃣ Update the CaseNo to the new one
            row_to_copy = row_to_copy.withColumn(key_col, lit(new_case))
 
            row_to_copy.display()
           
            try:
                row_to_copy.write.format("delta") \
                    .mode("append") \
                    .saveAsTable(table_name)
                print(f"✅ Successfully appended new row to {table_name} with New CaseNo = {new_case}")
            except Exception as e:
                print(f"❌ Failed to append case : {new_case} to {table_name}: {e}")
 
create_data(config)

In [0]:
# def delete_copy_case_nums_from_tables(config_list, suffix="/COPY"):
def delete_copy_case_nums_from_tables(config_list, suffix=""):
    for config in config_list:
        new_caseNo = config['caseMapping']['newCaseNo']
        tables = [
            t.strip() for t in config['tables'].keys() if t.strip() != ""
        ]
        print(tables)
 
        for table_name in tables:
            print(f"Processing table: {table_name}")
            try:
                df = spark.table(table_name)
            except Exception as e:
                print(f"⚠️ Could not read table {table_name}: {e}")
                continue
 
            # Filter rows matching the suffix
            if suffix == "":
                to_delete_df = df.filter(col("CaseNo") == new_caseNo)
                count_to_delete = to_delete_df.count()
            else:
                to_delete_df = df.filter(col("CaseNo").like(f"%{suffix}"))
                count_to_delete = to_delete_df.count()
 
            if count_to_delete == 0:
                print(f"ℹ️ No rows to delete in {table_name}")
                continue
 
            # Preview rows to delete
            display(to_delete_df.select("CaseNo"))
 
            # Perform deletion
            try:
                if suffix == "":
                    spark.sql(
                        f"DELETE FROM {table_name} WHERE CaseNo = '{new_caseNo}'"
                    )
                    print(f"✅ Deleted {count_to_delete} row(s) from {table_name} where = {new_caseNo} ")
                else:
                    spark.sql(
                        f"DELETE FROM {table_name} WHERE CaseNo LIKE '%{suffix}'"
                    )
                    print(f"✅ Deleted {count_to_delete} row(s) from {table_name} where suffix = {suffix}")
            except Exception as e:
                print(f"❌ Failed to delete rows from {table_name}: {e}")
 
delete_copy_case_nums_from_tables(config)
# delete_copy_case_nums_from_tables(config, suffix="/COPY2" )
 

In [0]:
import json
config = json.dumps(config)
print(json.dumps(json.loads(config), indent=4))