# Gold Layer Data Processing

This notebook lists common commands for preparing consumable data in the Gold layer.

## 1. Aggregation and Summarization

In [None]:
from pyspark.sql import functions as F

monthly_metrics = (
    silver_df.groupBy(
        F.year('order_date').alias('year'),
        F.month('order_date').alias('month')
    )
    .agg(
        F.sum('amount').alias('total_sales'),
        F.avg('amount').alias('avg_amount'),
        F.count('*').alias('order_count')
    )
)

monthly_metrics.write.format('delta').mode('overwrite').save('/mnt/gold/monthly_metrics')

## 2. Modeling for Specific Use Cases

In [None]:
# Example: Create a star schema dimension table
products_df.write.format('delta').mode('overwrite').saveAsTable('bi.dim_products')

# Create a fact table optimized for dashboards
facts_df.write.format('delta').mode('overwrite').saveAsTable('bi.fact_sales')

## 3. Consolidated Data Joins

In [None]:
# Join customer and transaction data from the Silver layer
customer_txn_df = customers_df.join(transactions_df, 'customer_id', 'inner')

# Persist the unified view
customer_txn_df.write.format('delta').mode('overwrite').save('/mnt/gold/customer_transactions')

## 4. Optimization for Visualization Tools

In [None]:
# Optimize table layout for faster BI queries
spark.sql("OPTIMIZE delta.`/mnt/gold/customer_transactions` ZORDER BY (customer_id)")

# Optionally create a Pandas sample for quick plotting
sample_pd = customer_txn_df.limit(10000).toPandas()

## 5. Final Quality Checks

In [None]:
# Verify row counts and enforce expectations
record_count = customer_txn_df.count()
assert record_count > 0, 'No records available'

# Example expectation: no null customer_id
assert customer_txn_df.filter('customer_id IS NULL').count() == 0

## 6. Security and Governance

In [None]:
# Mask sensitive columns for restricted roles
masked_df = customer_txn_df.withColumn('email', F.sha2('email', 256))

# Write with table ACLs applied
masked_df.write.format('delta').mode('overwrite').saveAsTable('secure.customer_transactions')