In [0]:
# =========================
# gold_hybrid (GOLD)
# Source: catalog_project.silver.silver_hybrid_manufacturing
# Target: catalog_project.gold.gold_hybrid_manufacturing
# =========================

from pyspark.sql import functions as F

#WIDGETS

In [0]:
dbutils.widgets.removeAll()
dbutils.widgets.text("catalog", "catalog_project")
dbutils.widgets.text("silver_schema", "silver")
dbutils.widgets.text("gold_schema", "gold")


In [0]:

catalog       = dbutils.widgets.get("catalog")
silver_schema = dbutils.widgets.get("silver_schema")
gold_schema   = dbutils.widgets.get("gold_schema")


#CONTEXT

In [0]:
spark.sql(f"USE CATALOG {catalog}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {gold_schema}")

source_table = f"{catalog}.{silver_schema}.silver_hybrid_manufacturing"
target_table = f"{catalog}.{gold_schema}.gold_hybrid_manufacturing"

#LOAD SILVER

In [0]:
df = spark.table(source_table)


#AGREGACIONES GOLD

In [0]:
df_gold = (
    df
    .groupBy(
        "Machine_ID",
        "Operation_Type",
        "Material_Used"
    )
    .agg(
        F.count("*").alias("total_jobs"),
        F.avg("total_delay_min").alias("avg_delay_min"),
        F.avg(F.col("is_delayed").cast("int")).alias("pct_jobs_delayed"),
        F.avg("efficiency_ratio").alias("avg_efficiency_ratio"),
        F.avg("energy_per_min").alias("avg_energy_per_min")
    )
)

#SAVE GOLD

In [0]:
(df_gold.write
 .mode("overwrite")
 .option("overwriteSchema", "true")
 .format("delta")
 .saveAsTable(target_table)
)

#VALIDACION

In [0]:
print(f"OK: {target_table}")
display(spark.table(target_table).limit(10))