In [0]:
# List all secret scopes available in the Databricks workspace

dbutils.secrets.listScopes()

[SecretScope(name='adlsstgconnection')]

In [0]:
# List all secrets stored in the secret scope named 'adlsstgconnection'

dbutils.secrets.list("adlsstgconnection")

[SecretMetadata(key='appid'),
 SecretMetadata(key='appsecret'),
 SecretMetadata(key='azure-sqldb-password'),
 SecretMetadata(key='azure-sqldb-username'),
 SecretMetadata(key='onprem-system-password')]

In [0]:
# Check if mount point exists or not if not then create new
# Mount Azure Data Lake Storage (ADLS) container to Databricks File System (DBFS)
# using OAuth authentication and credentials from secret scope

mount_point = "/mnt/project2-container"

if not any(mount.mountPoint == mount_point for mount in dbutils.fs.mounts()):
    configs = {
        "fs.azure.account.auth.type": "OAuth",
        "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
        "fs.azure.account.oauth2.client.id": dbutils.secrets.get(scope="adlsstgconnection", key="appid"),
        "fs.azure.account.oauth2.client.secret": dbutils.secrets.get(scope="adlsstgconnection", key="appsecret"),
        "fs.azure.account.oauth2.client.endpoint": "https://login.microsoftonline.com/78be0424-72cf-4e7f-8a71-966bd89c103b/oauth2/token"
    }

    dbutils.fs.mount(
        source="abfss://project2-container@hsinghadls.dfs.core.windows.net/",
        mount_point=mount_point,
        extra_configs=configs
    )
    print(f"Mounted successfully at {mount_point}")
else:
    print(f"{mount_point} is already mounted.")


/mnt/project2-container is already mounted.


In [0]:
# List all files and directories in the root of the mounted container '/mnt/project2-container'
dbutils.fs.ls("/mnt/project2-container")

[FileInfo(path='dbfs:/mnt/project2-container/Bronze_Layer/', name='Bronze_Layer/', size=0, modificationTime=1745394195000),
 FileInfo(path='dbfs:/mnt/project2-container/Gold_Layer/', name='Gold_Layer/', size=0, modificationTime=1745524456000),
 FileInfo(path='dbfs:/mnt/project2-container/Silver_Layer/', name='Silver_Layer/', size=0, modificationTime=1745523623000)]

In [0]:
# Function to generate a dynamic file path based on folder name and current date

from datetime import datetime

def get_file_path(folder_name):
    current_date = datetime.now().strftime("%Y-%m-%d")
    year, month, day = current_date.split("-")
    file_path = f"/mnt/project2-container/{folder_name}/{year}/{month}/{day}/"

    return file_path

In [0]:
# Function to read data from ADLS at the given path and format using Spark


def get_adls_data(path, file_format, file_name):
    df = spark.read.format(file_format).option("header", True).load(path + file_name)
    
    return df

In [0]:
# Function to write a Spark DataFrame to ADLS in the given file format and mode

def put_adls_data(path, file_format, df, file_name, mode):
    df.write.format(file_format).mode(mode).save(path+ file_name)
    
    return df

In [0]:
# Function to list all file names in the Bronze layer for the current date
# List and print all files in the dynamically generated Bronze layer path

def list_csv_files_in_bronze(bronze_path): 
    files = dbutils.fs.ls(bronze_path)
    file_names = [file.name for file in files if file.name]  
    return file_names

files = list_csv_files_in_bronze(get_file_path("Bronze_Layer"))
print(files)

['accounts.csv', 'customers.csv', 'loan_payments.csv', 'loans.csv', 'transactions.csv']
