In [0]:
dbutils.secrets.listScopes()

In [0]:
dbutils.secrets.list(scope='ingest00-meta002-sbox')

In [0]:
container_name = 'raw'
storage_account_name = 'ingest00rawsbox'

# List of required secrets
required_secrets = [
    "RAW-SAS-TOKEN"

]


for secret in required_secrets:
    foldername = 'ARIADM'           # secret.split('-')[0]
    mount_point = f"/mnt/{container_name}"
    source_url = f"wasbs://{container_name}@{storage_account_name}.blob.core.windows.net/"

    # Ensure mount_info is accessed correctly
    mounts = dbutils.fs.mounts()
    
    if not any(mount_point in (mount_info.mountPoint if isinstance(mount_info, tuple) else mount_info['mountPoint']) for mount_info in mounts):
        try:
            dbutils.fs.mount(
                source=source_url,
                mount_point=mount_point,
                extra_configs={f"fs.azure.sas.{container_name}.{storage_account_name}.blob.core.windows.net": dbutils.secrets.get(scope='ingest00-meta002-sbox', key=secret)}
            )
            print(f"Container '{container_name}' mounted successfully at '{mount_point}'")
        except Exception as e:
            print(f"Error mounting container at '{mount_point}': {e}")
    else:
        print(f"Mount point '{mount_point}' already exists.")

    # Access the specific folder
    folder_path = f"{mount_point}/{foldername}"

    # List files in the folder to verify
    try:
        files = dbutils.fs.ls(folder_path)
        display(files)
        print(f"Folder '{foldername}' accessed successfully at '{folder_path}'")
    except Exception as e:
        print(f"Error accessing folder '{foldername}': {e}")


In [0]:
container_name = 'landing'
storage_account_name = 'ingest00landingsbox'

# List of required secrets
required_secrets = [
    "LANDING-SAS-TOKEN"
]
for secret in required_secrets:
    foldername = secret.split('-')[0]
    mount_point = f"/mnt/{container_name}"

    # Ensure mount_info is accessed correctly
    mounts = dbutils.fs.mounts()
    
    if not any(mount_point in (mount_info.mountPoint if isinstance(mount_info, tuple) else mount_info['mountPoint']) for mount_info in mounts):
        try:
            dbutils.fs.mount(
                source=source_url,
                mount_point=mount_point,
                extra_configs={f"fs.azure.sas.{container_name}.{storage_account_name}.blob.core.windows.net": dbutils.secrets.get(scope='ingest00-meta002-sbox', key=secret)}
            )
            print(f"Container '{container_name}' mounted successfully at '{mount_point}'")
        except Exception as e:
            print(f"Error mounting container at '{mount_point}': {e}")
    else:
        print(f"Mount point '{mount_point}' already exists.")

    # Access the specific folder
    folder_path = f"{mount_point}"

    # List files in the folder to verify
    try:
        files = dbutils.fs.ls(folder_path)
        display(files)
        print(f"Folder '{foldername}' accessed successfully at '{folder_path}'")
    except Exception as e:
        print(f"Error accessing folder '{foldername}': {e}")


In [0]:
storage_account_name = "ingest00curatedsbox"

# List of containers
containers = ["gold", "silver", "bronze"]
foldername = 'ARIADM'

# Corresponding secrets (must be in the same order as containers)
secrets = ["GOLD-SAS-TOKEN", "SILVER-SAS-TOKEN", "BRONZE-SAS-TOKEN"]

# Iterate over both lists using zip
for container_name, secret_key in zip(containers, secrets):
    mount_point = f"/mnt/{container_name}"
    source_url = f"wasbs://{container_name}@{storage_account_name}.blob.core.windows.net/"

    # Ensure mount_info is accessed correctly
    mounts = dbutils.fs.mounts()

    if not any(mount_point in (mount_info.mountPoint if isinstance(mount_info, tuple) else mount_info["mountPoint"]) for mount_info in mounts):
        try:
            dbutils.fs.mount(
                source=source_url,
                mount_point=mount_point,
                extra_configs={f"fs.azure.sas.{container_name}.{storage_account_name}.blob.core.windows.net": 
                               dbutils.secrets.get(scope="ingest00-meta002-sbox", key=secret_key)}
            )
            print(f"Container '{container_name}' mounted successfully at '{mount_point}'")
        except Exception as e:
            print(f"Error mounting container '{container_name}' at '{mount_point}': {e}")
    else:
        print(f"Mount point '{mount_point}' already exists.")

    # Access the specific folder
    folder_path = f"{mount_point}"

    # List files in the folder to verify
    try:
        files = dbutils.fs.ls(folder_path)
        display(files)
        print(f"Folder '{foldername}' accessed successfully at '{folder_path}'")
    except Exception as e:
        print(f"Error accessing folder '{foldername}': {e}")


In [0]:
dbutils.notebook.exit("Success")

## APPENDIX

Autoloader

In [0]:
display(dbutils.fs.ls("/mnt/landing/IU_Original"))


In [0]:
%python
checkpoint_location = "dbfs:/mnt/landing/IU_Original/schema/"
target_table = "dbfs:/mnt/landing/IU_Original/target/"

dbutils.fs.rm(target_table, recurse=True)

In [0]:
dbutils.fs.put("dbfs:/mnt/landing/IU_Original/response/appeals_1_1cf6f5b1b00ed2b52c23c83afe48efe3_1_iu.rsp", """
{"operation": "input_upload", "timestamp": "2023-02-16T12:59:15.884621", "status": 1, "exception_description": null, "error_status": null, "filename": "_HP2_manifest", "submission_folder": "/dropzone/BOOKSDB/submission", "file_hash": "1cf6f5b1b00ed2b52c23c83afe48efe3"}
""", True)


In [0]:
from pyspark.sql.types import StructType, StructField, StringType, TimestampType, IntegerType
from pyspark.sql.functions import col

# Define the schema based on your JSON structure
schema = StructType([
    StructField("operation", StringType(), True),
    StructField("timestamp", TimestampType(), True),
    StructField("status", IntegerType(), True),
    StructField("exception_description", StringType(), True),
    StructField("error_status", StringType(), True),
    StructField("filename", StringType(), True),
    StructField("submission_folder", StringType(), True),
    StructField("file_hash", StringType(), True)
])

# Define paths
source_directory = "dbfs:/mnt/landing/IU_Original/response/"
schema_location = "dbfs:/mnt/landing/IU_Original/schema/"
checkpoint_location = "dbfs:/mnt/landing/IU_Original/schema/"
target_table = "dbfs:/mnt/landing/IU_Original/target/"

# Define Auto Loader config with schema
autoloader_config = {
    "cloudFiles.format": "json",
    "cloudFiles.schemaLocation": checkpoint_location,
    "cloudFiles.fileNamePattern": "*.rsp",
    "cloudFiles.includeExistingFiles": "true",
    "cloudFiles.validateOptions": "false",
    "cloudFiles.useNotifications": "false"  # ✅ Disable notifications
}

# autoloader_config = {
#     "cloudFiles.format": "json",
#     "cloudFiles.schemaLocation": checkpoint_location,
#     "cloudFiles.fileNamePattern": "*_iu.rsp",  # ✅ Match only files ending with _iu.rsp
#     "cloudFiles.includeExistingFiles": "true",
#     "cloudFiles.validateOptions": "false",
#     "cloudFiles.useNotifications": "false"  # ✅ Polling mode (no subscriptionId needed)
# }

# autoloader_config = {
#     "cloudFiles.format": "json",
#     "cloudFiles.schemaLocation": checkpoint_location,
#     "cloudFiles.fileNamePattern": ".*_iu\\.rsp$",  # ✅ Ensures filename ends strictly with _iu.rsp
#     "cloudFiles.includeExistingFiles": "true",
#     "cloudFiles.validateOptions": "false",
#     "cloudFiles.useNotifications": "false"  # ✅ Polling mode (no subscriptionId needed)
# }



def batch_process():
    df = (
        spark.readStream
        .format("cloudFiles")
        .options(**autoloader_config)
        .schema(schema) # ✅ Specify schema explicitly
        .load(source_directory)
        .select("*", 
                col("_metadata.file_path").alias("_FILE_PATH"), 
                col("_metadata.file_modification_time").alias("_FILE_MODIFICATION_TIME")
               )
    )

    query = (
        df.writeStream
        .option("checkpointLocation", checkpoint_location)
        .trigger(availableNow=True)
        .format("delta")
        .outputMode("append")
        .option("path", target_table)
        .start()
    )

    query.awaitTermination()

# Run batch processing
batch_process()