In [0]:
from pyspark.sql.functions import expr, date_format, when, dayofyear, col
from datetime import datetime, timedelta

start_date = datetime.strptime("1981-01-01", "%Y-%m-%d")
end_date = datetime.strptime("2050-12-31", "%Y-%m-%d")

num_days = (end_date - start_date).days + 1

date_list = [start_date + timedelta(days=i) for i in range(num_days)]

df = spark.createDataFrame(date_list, "date")

df_estacao_semana = df.withColumn("YEAR", date_format("value", "yyyy").cast('int')) \
                     .withColumn("MO", date_format("value", "MM").cast('int')) \
                     .withColumn("DY", date_format("value", "dd").cast('int')) \
                     .withColumn("DAY_OF_YEAR", dayofyear("value")) \
                     .withColumn("SEASON_CODE",
                                 when((col("DAY_OF_YEAR").between(80, 171)), 2)  # 80 = 21 de março (início do outono), 171 = 20 de junho (final do outono)
                                 .when((col("DAY_OF_YEAR").between(172, 265)), 3)  # 172 = 21 de junho (início do inverno), 264 = 22 de setembro (final do inverno)
                                 .when((col("DAY_OF_YEAR").between(265, 355)), 4)  # 265 = 23 de setembro (início da primavera), 355 = 21 de dezembro (final da primavera)
                                 .otherwise(1)) \
                     .withColumn("SEASON",
                                 when((col("DAY_OF_YEAR").between(80, 171)), "Outono")
                                 .when((col("DAY_OF_YEAR").between(172, 265)), "Inverno")
                                 .when((col("DAY_OF_YEAR").between(265, 355)), "Primavera")
                                 .otherwise("Verão")) \
                     .withColumn("WEEK_CODE", dayofweek("value")) \
                     .withColumn("WEEK_DAY",
                                 when(expr("WEEK_CODE = 1"), "Domingo")
                                 .when(expr("WEEK_CODE = 2"), "Segunda-feira")
                                 .when(expr("WEEK_CODE = 3"), "Terça-feira")
                                 .when(expr("WEEK_CODE = 4"), "Quarta-feira")
                                 .when(expr("WEEK_CODE = 5"), "Quinta-feira")
                                 .when(expr("WEEK_CODE = 6"), "Sexta-feira")
                                 .when(expr("WEEK_CODE = 7"), "Sábado")) \
                     .withColumnRenamed("value", "data_dia")

In [0]:
df_estacao_semana.write.mode("overwrite").option("path", "/mnt/projeto_climatico/silver/dim_data_estacao_semana").format("delta").saveAsTable("silver.dim_data_estacao_semana")

In [0]:
%sql optimize silver.dim_data_estacao_semana

path,metrics
dbfs:/mnt/projeto_climatico/silver/dim_data_estacao_semana,"List(1, 8, List(121034, 121034, 121034.0, 1, 121034), List(19418, 24332, 20509.5, 8, 164076), 0, null, 1, 8, 0, true, 0, 0, 1685936119387, 1685936150585, 8, 1, null, List(0, 0), 9, 9, 19801, 0)"
