#IMPORTS

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.types import *

#WIDGETS

In [0]:
dbutils.widgets.removeAll()

dbutils.widgets.text("storageName", "storageaccountcf9603")
dbutils.widgets.text("container", "raw")
dbutils.widgets.text("catalog", "catalog_project")
dbutils.widgets.text("schema", "bronze")

#CONSTANTS

In [0]:
storage_name = dbutils.widgets.get("storageName")
container    = dbutils.widgets.get("container")
catalog      = dbutils.widgets.get("catalog")
schema       = dbutils.widgets.get("schema")

#CONTEXT

In [0]:
spark.sql(f"USE CATALOG {catalog}")
spark.sql(f"USE SCHEMA {schema}")

#PATHS

In [0]:
path_base    = f"abfss://{container}@{storage_name}.dfs.core.windows.net/"
path_defects = f"{path_base}manufacturing_defect_dataset.csv"

#SCHEMA

In [0]:
defects_schema = StructType([
    StructField("ProductionVolume", IntegerType(), True),
    StructField("ProductionCost", DoubleType(), True),
    StructField("SupplierQuality", DoubleType(), True),
    StructField("DeliveryDelay", IntegerType(), True),
    StructField("DefectRate", DoubleType(), True),
    StructField("QualityScore", DoubleType(), True),
    StructField("MaintenanceHours", IntegerType(), True),
    StructField("DowntimePercentage", DoubleType(), True),
    StructField("InventoryTurnover", DoubleType(), True),
    StructField("StockoutRate", DoubleType(), True),
    StructField("WorkerProductivity", DoubleType(), True),
    StructField("SafetyIncidents", IntegerType(), True),
    StructField("EnergyConsumption", DoubleType(), True),
    StructField("EnergyEfficiency", DoubleType(), True),
    StructField("AdditiveProcessTime", DoubleType(), True),
    StructField("AdditiveMaterialCost", DoubleType(), True),
    StructField("DefectStatus", IntegerType(), True),
])

#READ SOURCE

In [0]:
df_defects = (
    spark.read.format("csv")
    .option("header", "true")
    .schema(defects_schema)
    .load(path_defects)
    .withColumn("_ingestion_ts", F.current_timestamp())
    .withColumn("_source_file", F.lit("manufacturing_defect_dataset.csv"))
)

#SAVE (DELTA TABLE IN UC)

In [0]:
target_table = f"{catalog}.{schema}.bronze_manufacturing_defects"

(df_defects.write
 .mode("overwrite")
 .option("overwriteSchema", "true")
 .format("delta")
 .saveAsTable(target_table)
)

#VALIDACION

In [0]:
print(f"OK: {target_table}")
display(spark.table(target_table).limit(10))

print("Schema guardado:")
spark.table(target_table).printSchema()

In [0]:
spark.table("catalog_project.bronze.bronze_manufacturing_defects").printSchema()
