In [0]:
%run /Workspace/Users/mrudular_2021@vemanait.edu.in/adf_assignmnet/src/bronze_to_silver/utils

In [0]:
from pyspark.sql.functions import col

# Step 1: Load delta tables
silver_base_path = "abfss://silver@edwinadfassign.dfs.core.windows.net/sales-view"
gold_output_path = "abfss://gold@edwinadfassign.dfs.core.windows.net/sales_view/StoreProductSalesAnalysis"

sales_df = read_delta_with_snake_case(spark, f"{silver_base_path}/customer_sales")
product_df = read_delta_with_snake_case(spark, f"{silver_base_path}/product")
store_df = read_delta_with_snake_case(spark, f"{silver_base_path}/store")

sales_df = sales_df.withColumnRenamed("product__id", "product_id")

# Step 2: Enrich with store-product data
store_product_df = get_store_product_data(product_df, store_df)

# Step 3: Join
final_df = enrich_sales_with_store_product(sales_df, store_product_df)

# Step 4: Resolve duplicates (remove duplicate `order_date` if it appears twice)
# Check for duplicates manually
duplicate_cols = [col_name for col_name in final_df.columns if final_df.columns.count(col_name) > 1]
if duplicate_cols:
    print(f"Duplicate Columns Detected: {duplicate_cols}")

# Optional: Drop duplicate columns if they came from right-side join
final_df = final_df.drop(*set(duplicate_cols[1:]))

# Step 5: Select required columns
selected_cols = [
    "order_date", "category", "city", "customer_id", "order_id", "product_id", "profit", "region", "sales", "segment",
    "ship_date", "ship_mode", "latitude", "longitude",
    "store_name", "location", "manager_name", "product_name", "price", "stock_quantity", "image_url"
]

# Only select columns that exist in final_df
selected_cols = [col for col in selected_cols if col in final_df.columns]

final_df = final_df.select(*selected_cols)
final_df.display()

# Step 6: Write to gold layer
final_df.write.format("delta").mode("overwrite").option("overwriteSchema", "true").save(gold_output_path)

# Optional: Register as table
spark.sql("DROP TABLE IF EXISTS StoreProductSalesAnalysis")
spark.sql(f"""
    CREATE TABLE StoreProductSalesAnalysis
    USING DELTA
    LOCATION '{gold_output_path}'
""")
