In [0]:
spark.conf.get("spark.databricks.passthrough.enabled")

'true'

In [0]:
# Define a configuration dictionary for mounting Azure Data Lake Storage Gen2 (ADLS Gen2).
# These configurations are necessary to authenticate and connect to ADLS Gen2
# using Azure Active Directory (Azure AD) credential passthrough.
configs = {
  # Specifies the authentication type for accessing Azure Data Lake Storage.
  # "CustomAccessToken" indicates that a custom token provider will be used.
  # For credential passthrough, Databricks uses a specific mechanism to generate
  # and manage these tokens based on the user's Azure AD identity.
  "fs.azure.account.auth.type": "CustomAccessToken",

  # Specifies the class that provides the custom access token for ADLS Gen2.
  # spark.conf.get("spark.databricks.passthrough.adls.gen2.tokenProviderClassName")
  # dynamically retrieves the name of the token provider class that is configured
  # at the cluster level for ADLS Gen2 credential passthrough.
  # This ensures that the mount uses the correct, cluster-configured mechanism
  # for secure token generation, enhancing security and maintainability.
  "fs.azure.account.custom.token.provider.class": spark.conf.get("spark.databricks.passthrough.adls.gen2.tokenProviderClassName")
}

# --- Unmount existing mount points if they exist ---
# This section ensures that the script can be run multiple times without failing due to
# pre-existing mount points. Attempting to mount to a path that is already a mount point
# will result in an error. Unmounting first provides a clean state.

# Check if a mount point already exists at "/mnt/bronze".
# dbutils.fs.mounts() returns a list of all active mount points in the Databricks File System (DBFS).
# The list comprehension iterates through these mounts and checks if any `mountPoint` attribute
# matches "/mnt/bronze".
if any(mount.mountPoint == "/mnt/bronze" for mount in dbutils.fs.mounts()):
  # If the mount point "/mnt/bronze" exists, unmount it.
  # dbutils.fs.unmount() removes the specified mount point from DBFS.
  # This is crucial for idempotency, allowing the script to be re-run reliably.
  dbutils.fs.unmount("/mnt/bronze")

# Check if a mount point already exists at "/mnt/silver".
# Similar to the "/mnt/bronze" check, this verifies if "/mnt/silver" is already mounted.
if any(mount.mountPoint == "/mnt/silver" for mount in dbutils.fs.mounts()):
  # If the mount point "/mnt/silver" exists, unmount it.
  # This prevents errors if the script is run again after a successful previous execution
  # or if the mount point was created manually.
  dbutils.fs.unmount("/mnt/silver")

# Check if a mount point already exists at "/mnt/gold".
# This performs the same check for the "/mnt/gold" mount point.
if any(mount.mountPoint == "/mnt/gold" for mount in dbutils.fs.mounts()):
  # If the mount point "/mnt/gold" exists, unmount it.
  # This ensures a clean setup for the subsequent mount operation.
  dbutils.fs.unmount("/mnt/gold")

# --- Mount the ADLS Gen2 containers to DBFS ---
# Mounting makes ADLS Gen2 storage accessible as if it were part of the local file system
# within Databricks (DBFS). This simplifies file access paths in notebooks and jobs.
# The `extra_configs = configs` part enables credential passthrough, meaning that
# access to the data lake will be governed by the Azure AD identity of the user
# running the notebook or job, enhancing security.

# Mount the 'bronze' container from ADLS Gen2 to "/mnt/bronze" in DBFS.
# The 'bronze' layer typically stores raw, ingested data.
# "abfss://" is the scheme for Azure Blob File System, which is used for ADLS Gen2.
# "bronze@datalakedeproj.dfs.core.windows.net/" is the URI for the 'bronze' container
# within the 'datalakedeproj' storage account.
# "/mnt/bronze" is the local path in DBFS where the 'bronze' container's contents will be accessible.
# The `extra_configs` argument applies the previously defined authentication settings,
# enabling credential passthrough.
# The comment "# Optionally, add <directory-name> to the source URI of your mount point."
# indicates that you could also mount a specific sub-directory within the container
# e.g., "abfss://bronze@datalakedeproj.dfs.core.windows.net/specific_folder/".
dbutils.fs.mount(
  source = "abfss://bronze@datalakedeproj.dfs.core.windows.net/", # Source URI of the ADLS Gen2 container
  mount_point = "/mnt/bronze",                                  # DBFS path to mount to
  extra_configs = configs                                       # Configurations for authentication (credential passthrough)
)

# Mount the 'silver' container from ADLS Gen2 to "/mnt/silver" in DBFS.
# The 'silver' layer typically stores data that has undergone some cleaning,
# transformation, or enrichment from the 'bronze' layer.
# The parameters follow the same pattern as the 'bronze' mount.
dbutils.fs.mount(
  source = "abfss://silver@datalakedeproj.dfs.core.windows.net/", # Source URI of the ADLS Gen2 container
  mount_point = "/mnt/silver",                                  # DBFS path to mount to
  extra_configs = configs                                       # Configurations for authentication
)

# Mount the 'gold' container from ADLS Gen2 to "/mnt/gold" in DBFS.
# The 'gold' layer typically stores highly refined, aggregated data ready for
# analytics, reporting, or consumption by downstream applications.
# The parameters follow the same pattern as the 'bronze' and 'silver' mounts.
dbutils.fs.mount(
  source = "abfss://gold@datalakedeproj.dfs.core.windows.net/",   # Source URI of the ADLS Gen2 container
  mount_point = "/mnt/gold",                                    # DBFS path to mount to
  extra_configs = configs                                       # Configurations for authentication
)

# After these commands are executed, data within the 'bronze', 'silver', and 'gold' containers
# in the 'datalakedeproj' ADLS Gen2 account will be accessible via DBFS paths
# "/mnt/bronze", "/mnt/silver", and "/mnt/gold" respectively.
# Access permissions will be determined by the Azure AD identity of the user executing commands,
# thanks to credential passthrough.

True

In [0]:
# --- List the contents of the mounted ADLS Gen2 containers ---
# These commands use the Databricks utility `dbutils.fs.ls()` to list files and directories
# within the specified DBFS paths. Since '/mnt/bronze', '/mnt/silver', and '/mnt/gold'
# were previously set up as mount points for ADLS Gen2 containers, these commands
# effectively list the contents of those respective containers.
# This is useful for verifying data, debugging, or understanding the structure of the data lake layers.

# List the contents of the '/mnt/bronze' directory.
# `dbutils.fs.ls()` is a Databricks File System utility that lists the files and directories
# at the given path.
# '/mnt/bronze' is the mount point for the 'bronze' ADLS Gen2 container, which typically
# stores raw data ingested from source systems.
# Why: This command is used to inspect the raw data that has landed in the bronze layer.
# It helps to:
#   - Verify that data ingestion processes have completed successfully.
#   - Check the names, sizes, and modification times of files/directories.
#   - Understand the structure of the raw data (e.g., subdirectories for different tables or dates).
#   - Debug issues if expected data is missing or has incorrect formats.
dbutils.fs.ls('/mnt/bronze')

# # Define a function to recursively list all files and directories in a given DBFS path
# def recursive_list(path):
#     # Use dbutils.fs.ls() to list the contents (files and folders) at the specified path
#     files = dbutils.fs.ls(path)
    
#     # Loop through each item returned by dbutils.fs.ls()
#     for f in files:
#         # Print the full path of the item (this could be a file or a directory)
#         print(f.path)
        
#         # Check if the current item is a directory
#         if f.isDir():
#             # If it is a directory, recursively call the function to list its contents
#             recursive_list(f.path)

# # Call the function on the bronze mount point to begin the recursive listing
# # This will print out the structure of all files and subdirectories under /mnt/bronze
# recursive_list("/mnt/bronze")

# List the contents of the '/mnt/silver' directory.
# '/mnt/silver' is the mount point for the 'silver' ADLS Gen2 container. This layer
# typically holds data that has been cleaned, transformed, and conformed from the bronze layer.
# It might involve data type corrections, filtering, or minor enrichments.
# Why: This command is used to examine the processed data in the silver layer.
# It helps to:
#   - Confirm that the bronze-to-silver transformation jobs have run and produced output.
#   - Inspect the structure and format of the cleaned data (e.g., Parquet or Delta files).
#   - Verify that expected tables or datasets are present after initial processing.
#   - Aid in debugging transformation logic if the output is not as expected.
dbutils.fs.ls('/mnt/silver')

# List the contents of the '/mnt/gold' directory.
# '/mnt/gold' is the mount point for the 'gold' ADLS Gen2 container. This layer
# typically stores highly curated, aggregated, and business-ready data, often
# organized into data models suitable for analytics, reporting, or direct consumption
# by business intelligence tools or applications.
# Why: This command is used to view the final, production-ready datasets in the gold layer.
# It helps to:
#   - Ensure that the silver-to-gold aggregation and final transformation pipelines have completed.
#   - Verify the presence and structure of the datasets that will be used for business insights.
#   - Check that the data is organized correctly for consumption (e.g., specific tables or views).
#   - Provide a quick way to see what data is available for end-users or downstream systems.
dbutils.fs.ls('/mnt/gold')

# The output of each `dbutils.fs.ls()` command is typically a list of FileInfo objects,
# where each object contains details about a file or directory, such as its path, name,
# size, and modification timestamp.

dbfs:/mnt/bronze/SalesLT/
dbfs:/mnt/bronze/SalesLT/Address/
dbfs:/mnt/bronze/SalesLT/Address/Address.parquet
dbfs:/mnt/bronze/SalesLT/Customer/
dbfs:/mnt/bronze/SalesLT/Customer/Customer.parquet
dbfs:/mnt/bronze/SalesLT/CustomerAddress/
dbfs:/mnt/bronze/SalesLT/CustomerAddress/CustomerAddress.parquet
dbfs:/mnt/bronze/SalesLT/Product/
dbfs:/mnt/bronze/SalesLT/Product/Product.parquet
dbfs:/mnt/bronze/SalesLT/ProductCategory/
dbfs:/mnt/bronze/SalesLT/ProductCategory/ProductCategory.parquet
dbfs:/mnt/bronze/SalesLT/ProductDescription/
dbfs:/mnt/bronze/SalesLT/ProductDescription/ProductDescription.parquet
dbfs:/mnt/bronze/SalesLT/ProductModel/
dbfs:/mnt/bronze/SalesLT/ProductModel/ProductModel.parquet
dbfs:/mnt/bronze/SalesLT/ProductModelProductDescription/
dbfs:/mnt/bronze/SalesLT/ProductModelProductDescription/ProductModelProductDescription.parquet
dbfs:/mnt/bronze/SalesLT/SalesOrderDetail/
dbfs:/mnt/bronze/SalesLT/SalesOrderDetail/SalesOrderDetail.parquet
dbfs:/mnt/bronze/SalesLT/SalesOr

[]