##Details for Connection Through Service Principal

In [0]:
# Retrieve secrets from Databricks secret scope
client_ID = dbutils.secrets.get(scope="my-scope", key="client-id")
client_secret = dbutils.secrets.get(scope="my-scope", key="client-secret")
tenant_id = dbutils.secrets.get(scope="my-scope", key="tenant-id")
# Configure Service Principal
configs = {
    "fs.azure.account.auth.type": "OAuth",
    "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
    "fs.azure.account.oauth2.client.id": client_ID,
    "fs.azure.account.oauth2.client.secret": client_secret,
    "fs.azure.account.oauth2.client.endpoint": f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
}

##Mounting the bronze container

In [0]:

dbutils.fs.mount(
    source = "abfss://bronze@destorageaccount11.dfs.core.windows.net/",
    mount_point = "/mnt/bronze",
    extra_configs = configs
)

print("Successfully mounted /mnt/bronze")

Successfully mounted /mnt/bronze


##Mounting the silver container

In [0]:

try:
    dbutils.fs.mount(
        source = "abfss://silver@destorageaccount11.dfs.core.windows.net/",
        mount_point = "/mnt/silver",
        extra_configs = configs
    )
    print("Successfully mounted /mnt/silver")
except Exception as e:
    print(f"Error mounting /mnt/silver: {e}")

Successfully mounted /mnt/silver


##Mounting the gold container

In [0]:

dbutils.fs.mount(
    source = "abfss://silver@destorageaccount11.dfs.core.windows.net/",
    mount_point = "/mnt/silver",
    extra_configs = configs
)
print("Successfully mounted /mnt/silver")
# Refresh mounts to propagate changes
dbutils.fs.refreshMounts()

Successfully mounted /mnt/gold
Mounts successfully refreshed.


True

##Transforming the and saving it in silver container.

In [0]:
from pyspark.sql.functions import col, date_format

# List all files in the bronze folder
bronze_files = dbutils.fs.ls("/mnt/bronze/")

# Iterate over each file
for file_info in bronze_files:
    file_path = file_info.path
    try:
        # Read the file
        df = spark.read.parquet(file_path)
        
        # Check if the DataFrame contains a column named 'date' or 'Date'
        if 'date' in df.columns:
            df = df.withColumn('date', date_format(col('date'), 'yyyy-MM-dd'))
        elif 'Date' in df.columns:
            df = df.withColumn('Date', date_format(col('Date'), 'yyyy-MM-dd'))
        
        # Save the processed file to the silver container as Delta format
        silver_path = file_path.replace('/mnt/bronze/', '/mnt/silver/')
        df.write.format("delta").mode('overwrite').save(silver_path)
        print(f"Processed and saved: {file_path} to {silver_path} as Delta format")
    except Exception as e:
        print(f"Error processing {file_path}: {e}")

Processed and saved: dbfs:/mnt/bronze/Address.parquet to dbfs:/mnt/silver/Address.parquet as Delta format
Processed and saved: dbfs:/mnt/bronze/Customer.parquet to dbfs:/mnt/silver/Customer.parquet as Delta format
Processed and saved: dbfs:/mnt/bronze/CustomerAddress.parquet to dbfs:/mnt/silver/CustomerAddress.parquet as Delta format
Processed and saved: dbfs:/mnt/bronze/Product.parquet to dbfs:/mnt/silver/Product.parquet as Delta format
Processed and saved: dbfs:/mnt/bronze/ProductCategory.parquet to dbfs:/mnt/silver/ProductCategory.parquet as Delta format
Processed and saved: dbfs:/mnt/bronze/ProductDescription.parquet to dbfs:/mnt/silver/ProductDescription.parquet as Delta format
Processed and saved: dbfs:/mnt/bronze/ProductModel.parquet to dbfs:/mnt/silver/ProductModel.parquet as Delta format
Processed and saved: dbfs:/mnt/bronze/ProductModelProductDescription.parquet to dbfs:/mnt/silver/ProductModelProductDescription.parquet as Delta format
Error processing dbfs:/mnt/bronze/SalesL

##Listing the files in directory

In [0]:
# List all files and directories in the silver container
silver_files = dbutils.fs.ls("/mnt/silver/")

# Print the paths of all files and directories in the silver container
print("Files and directories in the silver container:")
for file_info in silver_files:
    print(file_info.path)

Files and directories in the silver container:
dbfs:/mnt/silver/Address.parquet/
dbfs:/mnt/silver/Customer.parquet/
dbfs:/mnt/silver/CustomerAddress.parquet/
dbfs:/mnt/silver/Product.parquet/
dbfs:/mnt/silver/ProductCategory.parquet/
dbfs:/mnt/silver/ProductDescription.parquet/
dbfs:/mnt/silver/ProductModel.parquet/
dbfs:/mnt/silver/ProductModelProductDescription.parquet/
dbfs:/mnt/silver/SalesLT/
dbfs:/mnt/silver/SalesOrderDetail.parquet/
dbfs:/mnt/silver/SalesOrderHeader.parquet/
