In [0]:
%sql
--Create schema
CREATE SCHEMA IF NOT EXISTS `breweries-catalog`.silver_layer;

In [0]:
from pyspark.sql import functions as F

#Read bronze table
df_bronze = spark.read.table("`breweries-catalog`.bronze_layer.breweries_bronze")

In [0]:
# Select relevant fields
df_silver = (
    df_bronze
    .select(
        F.col("id").cast("string"),
        F.col("name"),
        F.col("brewery_type"),
        F.col("city"),
        F.col("state"),
        F.col("country"),
    )
# Normalize string fields
    .withColumn("name", F.lower(F.trim(F.col("name"))))
    .withColumn("brewery_type", F.lower(F.trim(F.col("brewery_type"))))
    .withColumn("city", F.lower(F.trim(F.col("city"))))
    .withColumn("state", F.lower(F.trim(F.col("state")))) 
    .withColumn("country", F.lower(F.trim(F.col("country"))))
# Add current date as processing_date
    .withColumn("processing_date", F.current_date())
# Filter out rows missing location
    .filter(F.col("country").isNotNull())
# Drop any duplicates
    .dropDuplicates(["id"])
)

In [0]:
#DQ
if df_silver.count() == 0:
    raise Exception("Data Quality Check Failed: Silver Table is empty")

null_columns = [c for c in df_silver.columns if df_silver.filter(F.col(c).isNull()).count() > 0]
if null_columns:
    raise Exception(f"Data Quality Check Failed: Null values found in columns: {null_columns}")

# DQ Log
null_counts = df_silver.select([
    F.count(F.when(F.col(c).isNull(), c)).alias(c) for c in df_silver.columns
])
df_silver_dq = null_counts.withColumn("dq_layer", F.lit("silver")) \
                   .withColumn("ingestion_timestamp", F.current_timestamp())

# Salvar informações na tabela de dq
df_silver_dq.write.mode("append").format("delta").saveAsTable("`breweries-catalog`.silver_layer.breweries_dq")

In [0]:
# Write the DataFrame with the desired partitioning
df_silver.write \
    .format("delta") \
    .mode("append") \
    .partitionBy("country") \
    .option("mergeSchema", "true") \
    .saveAsTable("`breweries-catalog`.silver_layer.breweries_silver")