In [0]:
from pyspark.sql.types import (
    StructType,
    StructField,
    StringType,
    BooleanType,
    TimestampType
)

# Schema definition for customers.json
customers_schema = StructType([
    StructField("customer_id", StringType(), nullable=False),
    StructField("first_name", StringType(), nullable=True),
    StructField("last_name", StringType(), nullable=True),
    StructField("email", StringType(), nullable=True),
    StructField("phone", StringType(), nullable=True),
    StructField("address", StructType([
        StructField("street", StringType(), nullable=True),
        StructField("city", StringType(), nullable=True),
        StructField("state", StringType(), nullable=True),
        StructField("zip_code", StringType(), nullable=True),
        StructField("country", StringType(), nullable=True)
    ]), nullable=True),
    StructField("loyalty_tier", StringType(), nullable=True),
    StructField("created_at", StringType(), nullable=True),  # ISO 8601 timestamp as string
    StructField("is_active", BooleanType(), nullable=True),
    StructField("preferences", StructType([
        StructField("newsletter", BooleanType(), nullable=True),
        StructField("sms_alerts", BooleanType(), nullable=True),
        StructField("preferred_contact", StringType(), nullable=True)
    ]), nullable=True),
    StructField("_metadata", StructType([
        StructField("source_system", StringType(), nullable=True),
        StructField("ingestion_timestamp", StringType(), nullable=True),
        StructField("batch_id", StringType(), nullable=True)
    ]), nullable=True)
])

In [0]:
df = spark.read \
        .schema(customers_schema) \
        .option("multiLine", True) \
        .json("dbfs:/Volumes/workspace/landingzone/orders_files/customers.json")

In [0]:
df.write.mode("overwrite").saveAsTable("bronze.customers")
