In [0]:
import os

client_id = os.environ.get('client_id')
tenant_id = os.environ.get('tenant_id')
client_secret = os.environ.get('secret_value')
storage_account = "project1azure1"

spark.conf.set(f"fs.azure.account.auth.type.{storage_account}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{storage_account}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{storage_account}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{storage_account}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{storage_account}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

In [0]:
from pyspark.sql.functions import (
    col, to_date, date_format,
    year, month, dayofmonth, quarter,
    dayofweek, weekofyear, when
)
from pyspark.sql.types import IntegerType, BooleanType

df_event = spark.read.format("delta").load("abfss://silver@project1azure1.dfs.core.windows.net/sales/events",header=True)

events_dates_df = df_event.withColumn("event_date",to_date(col("timestamp")))
date_df = events_dates_df.select("event_date").distinct().filter(col("event_date").isNotNull())

dim_date = date_df.withColumn("DateKey",date_format("event_date","yyyyMMdd").cast(IntegerType())) \
    .withColumn("Year", year(col("event_date"))) \
    .withColumn("Month", month(col("event_date"))) \
    .withColumn("Day", dayofmonth(col("event_date"))) \
    .withColumn("Quarter", quarter(col("event_date"))) \
    .withColumn("DayOfWeek", dayofweek(col("event_date"))) \
    .withColumn("WeekOfYear", weekofyear(col("event_date")))\
    .withColumn("IsWeekend",when(dayofweek(col("event_date")).isin(1,7),1).otherwise(0))

dim_date = dim_date.withColumn("IsWeekend",col("IsWeekend").cast(BooleanType()))

display(dim_date)

event_date,DateKey,Year,Month,Day,Quarter,DayOfWeek,WeekOfYear,IsWeekend
2015-05-19,20150519,2015,5,19,2,3,21,False
2015-09-02,20150902,2015,9,2,3,4,36,False
2015-05-10,20150510,2015,5,10,2,1,19,True
2015-06-15,20150615,2015,6,15,2,2,25,False
2015-08-01,20150801,2015,8,1,3,7,31,True
2015-06-19,20150619,2015,6,19,2,6,25,False
2015-08-24,20150824,2015,8,24,3,2,35,False
2015-09-06,20150906,2015,9,6,3,1,36,True
2015-06-22,20150622,2015,6,22,2,2,26,False
2015-05-16,20150516,2015,5,16,2,7,20,True


In [0]:
dim_date.write \
  .format("delta") \
  .mode("overwrite") \
  .option("overwriteSchema", "true") \
  .save("abfss://gold@project1azure1.dfs.core.windows.net/dim_date")