In [0]:
from pyspark.sql.functions import explode, sequence, to_date
from pyspark.sql.functions import min, max


## Find max and min date from Trip fact table

In [0]:
day_df = spark.sql('SELECT start_at,ent_at FROM TRIP_FACT')
beginDate, endDate = day_df.select(min('start_at'), max('ent_at')).first()

spark.sql(f"select explode(sequence(to_date('{beginDate}'), to_date('{endDate}'), interval 1 day)) as calendarDate").createOrReplaceTempView('dates')

In [0]:
%sql select * from dates

calendarDate
2021-02-01
2021-02-02
2021-02-03
2021-02-04
2021-02-05
2021-02-06
2021-02-07
2021-02-08
2021-02-09
2021-02-10


## Calendar dimension

In [0]:
calendar_df = spark.sql("select \
      CalendarDate AS Date,\
      year(calendarDate) AS Year,\
      date_format(calendarDate, 'MMMM') as CalendarMonth, \
      month(calendarDate) as Month,\
      dayofweek(calendarDate) AS WeekDay,\
      quarter(calendarDate) as Quarter\
  from dates\
  order by calendarDate")

calendar_df.printSchema()
display (calendar_df)

Date,Year,CalendarMonth,Month,WeekDay,Quarter
2021-02-01,2021,February,2,2,1
2021-02-02,2021,February,2,3,1
2021-02-03,2021,February,2,4,1
2021-02-04,2021,February,2,5,1
2021-02-05,2021,February,2,6,1
2021-02-06,2021,February,2,7,1
2021-02-07,2021,February,2,1,1
2021-02-08,2021,February,2,2,1
2021-02-09,2021,February,2,3,1
2021-02-10,2021,February,2,4,1


In [0]:
calendar_df.write.format("delta").mode("overwrite").save("/delta/gold/calendar_dimension")


In [0]:
spark.sql("CREATE TABLE IF NOT EXISTS CALENDAR_DIMENSION USING DELTA LOCATION '/delta/gold/calendar_dimension'")