# OpenMeteo Weather Data - Silver Layer - WRITE step

In [0]:
from pyspark.sql.functions import col, to_timestamp
from datetime import datetime, timedelta, UTC

In [0]:
# Initialize widget for Date Parameter (Default to Yesterday if not provided)
default_date = (datetime.now(UTC) - timedelta(days=1)).strftime('%Y-%m-%d')
dbutils.widgets.text("processing_date", default_date, "Date (YYYY-MM-DD)")

# Get the parameter
string_date = dbutils.widgets.get("processing_date")

processing_date = datetime.strptime(string_date, '%Y-%m-%d')

print(f"Starting processing for date: {processing_date}")

In [0]:
SOURCE_CATALOG = 'workspace'
SOURCE_SCHEMA = 'bronze'
SOURCE_TABLE = 'openmeteo_hourly_historical'

In [0]:
DESTINATION_CATALOG = 'workspace'
DESTINATION_SCHEMA_STAGING = 'silver_staging'
DESTINATION_SCHEMA_FINAL = 'silver'
DESTINATION_TABLE = 'openmeteo_hourly_historical'

In [0]:
df_bronze = spark.read\
    .table(f'{SOURCE_CATALOG}.{SOURCE_SCHEMA}.{SOURCE_TABLE}')\
    .where(col('observation_date') == processing_date)

In [0]:
# convert 2m temp into F
df_transformed = df_bronze.withColumn(
    "temperature_2m_f", 
    (col("temperature_2m") * 9/5 + 32)
)

In [0]:
df_transformed = df_transformed.withColumn(
    "time",
    (df_transformed.time.cast("timestamp"))
)

In [0]:
# only use for intial load
# df_transformed.write\
#     .format("delta")\
#     .mode("overwrite")\
#     .option("mergeSchema", "true")\
#     .partitionBy("observation_date")\
#     .saveAsTable(f"{DESTINATION_CATALOG}.{DESTINATION_SCHEMA_STAGING}.{DESTINATION_TABLE}")

In [0]:
df_transformed.createOrReplaceTempView("df_transformed")

In [0]:
d = spark.sql(
    f'''
    INSERT OVERWRITE TABLE {DESTINATION_CATALOG}.{DESTINATION_SCHEMA_STAGING}.{DESTINATION_TABLE}
    PARTITION (observation_date = '{processing_date}')
    SELECT * except(observation_date) FROM df_transformed
    ''')

In [0]:
display(d)