In [None]:
# Auto-install openpyxl if it's missing
try:
    import openpyxl
except ImportError:
    %pip install openpyxl
    import openpyxl
    dbutils.library.restartPython()

In [None]:
import pandas as pd 
from pyspark.sql.types import StructType, StructField, DateType, StringType, DoubleType, IntegerType
import pyspark.sql.functions as F
from pyspark.sql import Window

# Step 1: Import Daily AOP targets from Excel

file_path = "/Workspace/Users/financial-analytics-hub/AOP_gaited.xlsx"
sheet_name = "DailyAOP"
aop_df = pd.read_excel(file_path, sheet_name=sheet_name)

# Step 2: Standardize data structure and convert to Spark DataFrame

schema = StructType([
    StructField("Date", DateType(), True),
    StructField("Category", StringType(), True),
    StructField("Target", DoubleType(), True)
])
aop_spark_df = spark.createDataFrame(aop_df, schema=schema)

# Step 3: Save AOP data as a Delta table for persistent storage

spark.sql("USE CATALOG analytics")
spark.sql("USE SCHEMA gold")
aop_spark_df.write \
    .format("delta") \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable("aop_q1")

# Step 4: Load date dimension table

date_df = spark.table("analytics.gold.date")

# Step 5: Enrich AOP data with fiscal calendar information

aop_with_fiscal = (
    aop_spark_df
    .join(date_df.select("Date", "FiscalYear", "FiscalQuarter"), on="Date", how="left")
)

# Step 6: Calculate Quarter-to-Date (QTD) cumulative targets

window_spec = Window.partitionBy("FiscalYear", "FiscalQuarter", "Category") \
    .orderBy("Date") \
    .rowsBetween(Window.unboundedPreceding, Window.currentRow)

aop_q1_with_qtd = aop_with_fiscal.withColumn("QTD_Target", F.sum("Target").over(window_spec))

# Step 7: Save the enriched and cumulative AOP data

aop_q1_with_qtd.write \
    .format("delta") \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable("aop_q1_with_qtd")
