In [None]:
## Parameters
display_data = True
table_name = "time"

print("Successfully configured all paramaters for this run.")

In [None]:
## Import all packages used in this notebook
from pyspark.sql import SparkSession, Row
from pyspark.sql.functions import col, when, expr, date_format, hour, minute
from datetime import datetime, timedelta, time

print("Successfully imported all packages for this notebook.")

In [None]:
#
# Create the Spark session
#
app_name = "TimeDimensionBuilder"

# Get the current Spark session
spark = SparkSession.builder \
    .appName(app_name) \
    .getOrCreate()

print(f"Spark session {app_name} has been created successfully.")

In [None]:
# 🧩 Step 1: Create the Day Period Lookup Table

# Define day periods with start and end times
periods = [
    ("00:00:00", "05:59:00", "Early Morning"),
    ("06:00:00", "11:59:00", "Morning"),
    ("12:00:00", "17:59:00", "Afternoon"),
    ("18:00:00", "20:59:00", "Evening"),
    ("21:00:00", "23:59:00", "Night")
]

base_date = datetime(2000, 1, 1)
records = []

for start, end, label in periods:
    start_dt = datetime.strptime(f"{base_date.date()} {start}", "%Y-%m-%d %H:%M:%S")
    end_dt = datetime.strptime(f"{base_date.date()} {end}", "%Y-%m-%d %H:%M:%S")
    for i in range(int((end_dt - start_dt).total_seconds() // 60) + 1):
        ts = start_dt + timedelta(minutes=i)
        records.append(Row(
            DayPeriodStart=start_dt,
            DayPeriodEnd=end_dt,
            DayPeriod=label,
            MinuteKey=ts.strftime("%H:%M")
        ))

day_period_df = spark.createDataFrame(records)


print("Successfully created the day period dataframe.")

In [None]:
if display_data:
    display(day_period_df)

In [None]:
# 🧩 Step 2: Build the Time Dimension at Minute Grain

# Create one row for every second of the day
base_datetime = datetime(2000, 1, 1)
rows = [Row(Time=base_datetime + timedelta(seconds=i)) for i in range(86400)]
time_df = spark.createDataFrame(rows)

# Derive minute-level columns
minute_df = time_df \
    .withColumn("HourNumber", hour("Time")) \
    .withColumn("MinuteNumber", minute("Time")) \
    .withColumn("Hour", expr("make_timestamp(2000, 1, 1, HourNumber, 0, 0)")) \
    .withColumn("Minute", expr("make_timestamp(2000, 1, 1, HourNumber, MinuteNumber, 0)")) \
    .withColumn("QuarterHour", expr("""
        CASE 
            WHEN MinuteNumber < 15 THEN make_timestamp(2000,1,1,HourNumber,0,0)
            WHEN MinuteNumber < 30 THEN make_timestamp(2000,1,1,HourNumber,15,0)
            WHEN MinuteNumber < 45 THEN make_timestamp(2000,1,1,HourNumber,30,0)
            ELSE make_timestamp(2000,1,1,HourNumber,45,0)
        END
    """)) \
    .withColumn("HalfHour", expr("""
        CASE 
            WHEN MinuteNumber < 30 THEN make_timestamp(2000,1,1,HourNumber,0,0)
            ELSE make_timestamp(2000,1,1,HourNumber,30,0)
        END
    """)) \
    .withColumn("TimeId", date_format("Time", "HHmm").cast("int")) \
    .withColumn("MinuteKey", date_format("Time", "HH:mm")) \
    .dropDuplicates(["TimeId"]) \
    .select(
        "TimeId",
        "MinuteNumber",
        "Minute",
        "QuarterHour",
        "HalfHour",
        "HourNumber",
        "Hour",
        "MinuteKey"
    )


print("Successfully created the minute-grain dataframe.")

In [None]:
if display_data:
    display(minute_df)

In [None]:
# 🧩 Step 3: Join Day Period to Time Table

# Add join key to day_period_df
day_period_df = day_period_df.withColumn("MinuteKey", col("MinuteKey"))

# Join and enrich
final_df = minute_df.join(day_period_df, on="MinuteKey", how="left").drop("MinuteKey")

# Add AM/PM and business hour flags
final_df = final_df \
    .withColumn("IsAM", when(hour("Minute") < 12, True).otherwise(False)) \
    .withColumn("IsBusinessHour", when((hour("Minute") >= 9) & (hour("Minute") <= 17), True).otherwise(False))


print("Successfully created the final time dataframe by joining the day-period and minute-grained.")

In [None]:
if display_data:
    display(final_df)

In [None]:
# 🧩 Step 4: Save to Microsoft Fabric Lakehouse
final_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable(table_name)


print(f"Successfully created the table {table_name} in the lakehouse with {final_df.count()} rows.")