In [0]:
import os

client_id = os.environ.get('client_id')
tenant_id = os.environ.get('tenant_id')
client_secret = os.environ.get('secret_value')
storage_account = "project1azure1"

spark.conf.set(f"fs.azure.account.auth.type.{storage_account}.dfs.core.windows.net", "OAuth")
spark.conf.set(f"fs.azure.account.oauth.provider.type.{storage_account}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
spark.conf.set(f"fs.azure.account.oauth2.client.id.{storage_account}.dfs.core.windows.net", client_id)
spark.conf.set(f"fs.azure.account.oauth2.client.secret.{storage_account}.dfs.core.windows.net", client_secret)
spark.conf.set(f"fs.azure.account.oauth2.client.endpoint.{storage_account}.dfs.core.windows.net", f"https://login.microsoftonline.com/{tenant_id}/oauth2/token")

In [0]:
from pyspark.sql.functions import (
    col, hour, minute, lpad, concat_ws, when
)
from pyspark.sql.types import IntegerType, StringType

events_df = spark.read.format("delta").load("abfss://silver@project1azure1.dfs.core.windows.net/sales/events")


#hour,minute column
time_df = events_df.select(hour(col("timestamp")).alias("Hour"),minute(col("timestamp")).alias("Minute")).distinct()

#timekey column
time_df = time_df.withColumn("Hourstr", lpad(col("Hour").cast(StringType()), 2, "0"))\
    .withColumn("Minstr", lpad(col("Minute").cast(StringType()), 2, "0"))\
    .withColumn("TimeKey", concat_ws("", col("Hourstr"), col("Minstr")).cast(IntegerType())).drop("Hourstr","Minstr")

#TimeOfDay column
time_df = time_df.withColumn("TimeOfDay",when(col("Hour").between(6,11),"Morning").when(col("Hour").between(12,15),"Afternoon").when(col("Hour").between(16,19),"Evening").otherwise("Night"))

#IsBussinessHours column
time_df = time_df.withColumn("IsBussinessHours",when(col("Hour").between(9,18),1).otherwise(0))

time_df.display()



Hour,Minute,TimeKey,TimeOfDay,IsBussinessHours
3,57,357,Night,0
7,55,755,Morning,0
22,53,2253,Night,0
15,26,1526,Afternoon,1
15,14,1514,Afternoon,1
3,22,322,Night,0
8,52,852,Morning,0
17,33,1733,Evening,1
5,49,549,Night,0
22,33,2233,Night,0


In [0]:
time_df.write.mode("overwrite").format("delta").save("abfss://gold@project1azure1.dfs.core.windows.net/dim_time")
