#IMPORTS

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.types import *

#WIDGETS

In [0]:
dbutils.widgets.removeAll()

dbutils.widgets.text("storageName", "storageaccountcf9603")
dbutils.widgets.text("container", "raw")
dbutils.widgets.text("catalog", "catalog_project")
dbutils.widgets.text("schema", "bronze")

#CONSTANTS

In [0]:
storage_name = dbutils.widgets.get("storageName")
container    = dbutils.widgets.get("container")
catalog      = dbutils.widgets.get("catalog")
schema       = dbutils.widgets.get("schema")

#CONTEXT

In [0]:
spark.sql(f"USE CATALOG {catalog}")
spark.sql(f"USE SCHEMA {schema}")

#PATHS

In [0]:
path_base   = f"abfss://{container}@{storage_name}.dfs.core.windows.net/"
path_hybrid = f"{path_base}hybrid_manufacturing_categorical.csv"

In [0]:
hybrid_schema = StructType([
    StructField("Job_ID", StringType(), True),
    StructField("Machine_ID", StringType(), True),
    StructField("Operation_Type", StringType(), True),
    StructField("Material_Used", StringType(), True),   # <-- FIX
    StructField("Processing_Time", IntegerType(), True),
    StructField("Energy_Consumption", DoubleType(), True),
    StructField("Machine_Availability", IntegerType(), True),
    StructField("Scheduled_Start", TimestampType(), True),
    StructField("Scheduled_End", TimestampType(), True),
    StructField("Actual_Start", TimestampType(), True),
    StructField("Actual_End", TimestampType(), True),
    StructField("Job_Status", StringType(), True),
    StructField("Optimization_Category", StringType(), True),
])


#Read Source

In [0]:
df_hybrid = (
    spark.read.format("csv")
    .option("header", "true")
    .schema(hybrid_schema)
    .load(path_hybrid)
    .withColumn("_ingestion_ts", F.current_timestamp())
    .withColumn("_source_file", F.lit("hybrid_manufacturing_categorical.csv"))
)

#SAVE (Delta table in UC)

In [0]:
target_table = f"{catalog}.{schema}.bronze_hybrid_manufacturing_categorical"

(df_hybrid.write
 .mode("overwrite")
 .option("overwriteSchema", "true")   
 .format("delta")
 .saveAsTable(target_table)
)

target_path = "abfss://bronze@storageaccountcf9603.dfs.core.windows.net/bronze_hybrid_manufacturing"

(df_hybrid.write
  .format("delta")
  .mode("overwrite")   # o "append"
  .save(target_path)
)

#Validacion

In [0]:
print(f"OK: {target_table}")
display(spark.table(target_table).limit(10))