In [0]:
# Autoreload + Setup 
%load_ext autoreload
%autoreload 2

import sys
from config.table_config import PROJECT_PATH

if PROJECT_PATH not in sys.path:
    sys.path.append(PROJECT_PATH)

from config import *
from utils import BronzeIngestion
from pyspark.sql.types import *
from pyspark.sql.functions import col, year, month

configure_spark(spark)

In [0]:
# Define Schema
orders_schema = StructType([
    StructField("order_id",          StringType(),    nullable=True),
    StructField("customer_id",       IntegerType(),   nullable=True),
    StructField("seller_id",         IntegerType(),   nullable=True),
    StructField("order_date",        TimestampType(), nullable=True),
    StructField("order_total_price", DoubleType(),    nullable=True),
    StructField("payment_method",    StringType(),    nullable=True)
])

In [0]:
ingestion = BronzeIngestion(
    spark=spark,
    table_display_name="ORDERS",
    source_path=BRONZE_ORDERS,
    target_table=BRONZE_ORDERS_TABLE
)

ingestion.check_source(dbutils)
ingestion.read(schema=orders_schema)

# Add partition columns (year/month for time-series queries)
ingestion.df = ingestion.df \
    .withColumn("order_year",  year(col("order_date"))) \
    .withColumn("order_month", month(col("order_date")))

ingestion.write_iceberg(
    catalog_name=CATALOG_NAME,
    bronze_schema=BRONZE_SCHEMA,
    partition_cols=["order_year", "order_month"]
)

ingestion.show_table_details()