In [0]:
 %run ./00setupconfig


In [0]:
# Databricks notebook source
# MAGIC %md
# MAGIC # 01 - Bronze Layer: Autoloader (Simplified)
# MAGIC 
# MAGIC **Purpose:** Load CSV files from source path and create bronze table
# MAGIC 
# MAGIC **Author:** Data Engineering Team  
# MAGIC **Last Updated:** December 2024

# COMMAND ----------

# MAGIC %md
# MAGIC ## Load Configuration

# COMMAND ----------

# MAGIC %run ./00_setup_config

# COMMAND ----------

from pyspark.sql import functions as F

# Access configuration from setup notebook
# The config variables are already available from %run command
print(f"✅ Configuration loaded")
print(f"   Catalog: {CATALOG}")
print(f"   Bronze Schema: {BRONZE_SCHEMA}")
print(f"   Source: {SOURCE_DATA_PATH}")
print(f"   Target: `{CATALOG}`.{BRONZE_SCHEMA}.{BRONZE_TABLE}")

# COMMAND ----------

# MAGIC %md
# MAGIC ## Create Bronze Table with Autoloader

# COMMAND ----------

# Bronze checkpoint location
bronze_checkpoint = f"{CHECKPOINT_LOCATION}bronze_stream/"

# Start Autoloader stream
bronze_stream = (spark.readStream
    .format("cloudFiles")
    .option("cloudFiles.format", "csv")
    .option("cloudFiles.schemaLocation", f"{CHECKPOINT_LOCATION}schema")
    .option("cloudFiles.inferColumnTypes", "true")
    .option("cloudFiles.schemaEvolutionMode", "addNewColumns")
    .option("header", "true")
    .load(SOURCE_DATA_PATH)
    .withColumn("ingestion_timestamp", F.current_timestamp())
    .withColumn("source_file", F.input_file_name())
)

# Write to Delta table - trigger only when data is available
query = (bronze_stream.writeStream
    .format("delta")
    .outputMode("append")
    .option("checkpointLocation", bronze_checkpoint)
    .option("mergeSchema", "true")
    .trigger(availableNow=True)  # Process available data and stop
    .table(f"`{CATALOG}`.{BRONZE_SCHEMA}.{BRONZE_TABLE}")
)

# Wait for stream to complete
query.awaitTermination()

print(f"✅ Bronze data processed")
print(f"   Table: `{CATALOG}`.{BRONZE_SCHEMA}.{BRONZE_TABLE}")

# COMMAND ----------

# MAGIC %md
# MAGIC ## Verify Bronze Table

# COMMAND ----------

# Read and display bronze table
bronze_df = spark.table(f"`{CATALOG}`.{BRONZE_SCHEMA}.{BRONZE_TABLE}")

print(f"✅ Bronze Table: `{CATALOG}`.{BRONZE_SCHEMA}.{BRONZE_TABLE}")
print(f"   Total Records: {bronze_df.count():,}")

display(bronze_df.limit(10))

# COMMAND ----------

# MAGIC %md
# MAGIC ## Next Step
# MAGIC 
# MAGIC Bronze layer complete. Run next notebook: `02_silver_star_schema.py`

old code