In [0]:
from pyspark.sql.functions import col
from pyspark.sql.types import IntegerType, DoubleType, BooleanType, DateType

In [0]:
"""
Purpose: Load raw supply chain CSV data into Bronze Delta table.
Layer: Bronze (Raw)
"""

from pyspark.sql import SparkSession

# Initialize Spark session (Databricks automatically has spark object)
spark = SparkSession.builder.appName("BronzeIngestion").getOrCreate()

# File path uploaded to Databricks FileStore
file_path = "/FileStore/tables/inventory_data.csv"

# Read CSV into Spark DataFrame
df_bronze = spark.read.option("header", True).option("inferSchema", True).csv(file_path)

# Preview data
df_bronze.show(5, truncate=False)

# Write to Bronze Delta table
bronze_path = "/mnt/bronze/supply_chain_inventory"
df_bronze.write.format("delta").mode("overwrite").save(bronze_path)

# Register as SQL table
spark.sql("CREATE DATABASE IF NOT EXISTS supply_chain_db")
spark.sql("""
    CREATE TABLE IF NOT EXISTS supply_chain_db.bronze_inventory
    USING DELTA
    LOCATION '{}'
""".format(bronze_path))

print("Bronze Layer Ingestion Complete âœ…")
