In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, DoubleType
from pyspark.sql.functions import col, sum, coalesce

spark = SparkSession.builder \
    .appName("IncrementalLoadGSynergy") \
    .config("spark.databricks.delta.schema.autoMerge.enabled", "true") \
    .enableHiveSupport() \
    .getOrCreate()

# Define Schema for Fact Table
schema = StructType([
    StructField("pos_site_id", StringType(), True),
    StructField("sku_id", StringType(), True),
    StructField("fsclwk_id", StringType(), True),
    StructField("price_substate_id", StringType(), True),
    StructField("type", StringType(), True),
    StructField("sales_units", DoubleType(), True),
    StructField("sales_dollars", DoubleType(), True),
    StructField("discount_dollars", DoubleType(), True)
])

# Load Test Data for Incremental Load
new_data = [
    ("1001", "A123", "202510", "P01", "Online", 10.0, 200.0, 15.0),
    ("1002", "B456", "202510", "P02", "Store", 5.0, 100.0, 8.0),  
]

new_fact_df = spark.createDataFrame(new_data, schema=schema)

print("\n Incoming Incremental Data (new_fact_df):")
new_fact_df.show()

# Load Existing Data from Delta Table
try:
    mview_weekly_sales_df = spark.read.format("delta").table("gsynergy_db.mview_weekly_sales")
except:
    mview_weekly_sales_df = spark.createDataFrame([], schema) 

print("\n Existing Data in Delta Table (mview_weekly_sales_df):")
mview_weekly_sales_df.show()

# Aggregate New Incoming Data for Weekly Totals
updated_mview_df = new_fact_df \
    .groupBy("pos_site_id", "sku_id", "fsclwk_id", "price_substate_id", "type") \
    .agg(
        sum("sales_units").alias("new_sales_units"),
        sum("sales_dollars").alias("new_sales_dollars"),
        sum("discount_dollars").alias("new_discount_dollars")
    )

print("\n Aggregated Incremental Data (updated_mview_df):")
updated_mview_df.show()

# Merge New Data with Existing Data
final_mview_df = mview_weekly_sales_df.alias("old").join(
    updated_mview_df.alias("new"),
    ["pos_site_id", "sku_id", "fsclwk_id", "price_substate_id", "type"],
    "outer"
).select(
    coalesce(col("old.pos_site_id"), col("new.pos_site_id")).alias("pos_site_id"),
    coalesce(col("old.sku_id"), col("new.sku_id")).alias("sku_id"),
    coalesce(col("old.fsclwk_id"), col("new.fsclwk_id")).alias("fsclwk_id"),
    coalesce(col("old.price_substate_id"), col("new.price_substate_id")).alias("price_substate_id"),
    coalesce(col("old.type"), col("new.type")).alias("type"),
    (coalesce(col("old.total_sales_units"), col("new.new_sales_units"), col("old.total_sales_units"))).alias("total_sales_units"),
    (coalesce(col("old.total_sales_dollars"), col("new.new_sales_dollars"), col("old.total_sales_dollars"))).alias("total_sales_dollars"),
    (coalesce(col("old.total_discount_dollars"), col("new.new_discount_dollars"), col("old.total_discount_dollars"))).alias("total_discount_dollars")
)

print("\n Final DataFrame After Incremental Load (final_mview_df):")
final_mview_df.show()

# Write Final Data Back to Delta Table with Schema Evolution Enabled
final_mview_df.write \
    .format("delta") \
    .mode("overwrite") \
    .option("mergeSchema", "true") \
    .saveAsTable("gsynergy_db.mview_weekly_sales")

print("\n Incremental Data Added Successfully!")



 Incoming Incremental Data (new_fact_df):
+-----------+------+---------+-----------------+------+-----------+-------------+----------------+
|pos_site_id|sku_id|fsclwk_id|price_substate_id|  type|sales_units|sales_dollars|discount_dollars|
+-----------+------+---------+-----------------+------+-----------+-------------+----------------+
|       1001|  A123|   202510|              P01|Online|       10.0|        200.0|            15.0|
|       1002|  B456|   202510|              P02| Store|        5.0|        100.0|             8.0|
+-----------+------+---------+-----------------+------+-----------+-------------+----------------+


 Existing Data in Delta Table (mview_weekly_sales_df):
+-----------+----------+---------+-----------------+-----------------+-------------------+----------------------+------+
|pos_site_id|    sku_id|fsclwk_id|price_substate_id|total_sales_units|total_sales_dollars|total_discount_dollars|  type|
+-----------+----------+---------+-----------------+------------