In [0]:
%python
service_credential = dbutils.secrets.get(
    scope="azure-sp",
    key="client-secret"
)

adls_configs = {
    "fs.azure.account.auth.type.deproj1adls.dfs.core.windows.net": "OAuth",
    "fs.azure.account.oauth.provider.type.deproj1adls.dfs.core.windows.net": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
    "fs.azure.account.oauth2.client.id.deproj1adls.dfs.core.windows.net": "aca11b08-9a74-41fe-9d44-09d033504c53",
    "fs.azure.account.oauth2.client.secret.deproj1adls.dfs.core.windows.net": service_credential,
    "fs.azure.account.oauth2.client.endpoint.deproj1adls.dfs.core.windows.net": "https://login.microsoftonline.com/df6de6ef-8435-42ec-ad06-b4ca441bfdf4/oauth2/token"
}

silver_df = spark.read.format("delta").options(
    **adls_configs
).load(
    "abfss://silver@deproj1adls.dfs.core.windows.net/rebrickable_api/Minifigs"
)


In [0]:
from pyspark.sql.functions import *
from pyspark.sql.window import Window

dim_df = (
    silver_df
    .withColumn("minifig_key", monotonically_increasing_id())
    .withColumn("modified_year", year(col("lastModifiedDatetime")))
    .withColumn("name_length", length(col("Name")))
    .withColumn(
        "is_superhero_flag",
        when(upper(col("Name")).rlike("BATMAN|SUPER|CAPTAIN|SPIDER|IRON|HULK|THOR"), lit(1)).otherwise(lit(0))
    )
    .select(
        col("minifig_key"),
        col("SetNumber").alias("minifig_id"),
        col("Name").alias("minifig_name"),
        col("NumberOfParts").alias("num_parts"),
        col("MinifigType").alias("minifig_type"),
        col("ImageURL").alias("image_url"),
        col("SetURL").alias("minifig_url"),
        col("lastModifiedDatetime").alias("last_modified_ts"),
        col("modified_year"),
        col("name_length"),
        col("is_superhero_flag")
    )
)


In [0]:
dim_df.write\
  .format("delta")\
  .options(**adls_configs)\
  .save("abfss://gold@deproj1adls.dfs.core.windows.net/rebrickable_api/Minifigs/dim_minifigs")
    