In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col

spark = SparkSession.builder.appName("gold_layer_conform_zone").getOrCreate()

# Paths to Silver and Bronze Delta tables
sales_silver_path = '/Volumes/workspace/hls_demo_2/refined_data/sales_data_silver_delta'
inventory_silver_path = '/Volumes/workspace/hls_demo_2/refined_data/inventory_data_silver_delta'
clinical_trial_silver_path = '/Volumes/workspace/hls_demo_2/refined_data/clinical_trial_data_silver_delta'
bronze_sales_path = '/Volumes/workspace/hls_demo_2/raw_data/sales_data_bronze_delta'  # For product_code mapping

# Load Silver layer data
sales_silver_df = spark.read.format('delta').load(sales_silver_path)
inventory_silver_df = spark.read.format('delta').load(inventory_silver_path)
clinical_trial_silver_df = spark.read.format('delta').load(clinical_trial_silver_path)

# Load Bronze sales data to get product_code mapping
bronze_sales_df = spark.read.format('delta').load(bronze_sales_path)
product_lookup_df = bronze_sales_df.select('product_code', 'product_name').distinct()

# Join sales silver with product lookup to add product_code
sales_with_code_df = sales_silver_df.join(product_lookup_df, on='product_name', how='left')

# Join sales with inventory on product_code
gold_sales_inventory_df = sales_with_code_df.join(
    inventory_silver_df,
    on='product_code',
    how='inner'
).select(
    'product_code',
    'product_name',
    'region',
    'year',
    'month',
    'total_sales_qty',
    'total_sales_amount',
    'location',
    'total_stock_qty',
    'total_reorder_point',
    'stock_to_reorder_ratio'
)

# Join with clinical trial data on product_name (drug_name renamed)
gold_combined_df = gold_sales_inventory_df.join(
    clinical_trial_silver_df.withColumnRenamed('drug_name', 'product_name'),
    on='product_name',
    how='left'
).select(
    'product_code',
    'product_name',
    'region',
    'year',
    'month',
    'total_sales_qty',
    'total_sales_amount',
    'location',
    'total_stock_qty',
    'total_reorder_point',
    'stock_to_reorder_ratio',
    'avg_success_rate',
    'avg_trial_duration_days'
)

# Drop product_code before writing to match existing Delta table schema
gold_combined_df_final = gold_combined_df.drop('product_code')

# Show sample output
gold_combined_df_final.show(10, truncate=False)

# Save Gold layer Delta table
gold_path_delta = '/Volumes/workspace/hls_demo_2/conform_data/gold_layer_delta'
gold_combined_df_final.write.format('delta').mode('overwrite').save(gold_path_delta)
