Create Lakehouse Tables for schema retail 

In [None]:
# ===========================
# SETUP CELL - Run First
# ===========================

from pyspark.sql import SparkSession
from pyspark.sql.types import *
from pyspark.sql import DataFrame

# Initialize Spark session
spark = SparkSession.builder.appName("RetailSchemaCreation").getOrCreate()

# Configuration
SCHEMA_NAME = "retail"

# Create retail schema
print("🏗️ Setting up retail schema...")
spark.sql(f"CREATE DATABASE IF NOT EXISTS {SCHEMA_NAME}")
print(f"✅ {SCHEMA_NAME} schema ready!")

# Helper function to create MANAGED tables (Fabric Lakehouse approach)
def create_table(table_name: str, schema: StructType, description: str = ""):
    """
    Create a MANAGED Delta table in the retail schema (Fabric Lakehouse compatible)
    
    Args:
        table_name: Name of the table
        schema: StructType schema definition
        description: Optional description for logging
    """
    try:
        print(f"🏗️ Creating {table_name}...")
        
        # Create empty DataFrame with schema
        empty_df = spark.createDataFrame([], schema)
        
        # Write as MANAGED Delta table (no path specification)
        (empty_df.write
         .format("delta")
         .mode("overwrite")
         .saveAsTable(f"{SCHEMA_NAME}.{table_name}")  # Removed .option("path", ...)
        )
        
        print(f"✅ {table_name} created successfully!")
        
        # Show table structure
        print(f"📋 {table_name} structure:")
        spark.sql(f"DESCRIBE {SCHEMA_NAME}.{table_name}").show(truncate=False)
        
        return True
        
    except Exception as e:
        print(f"❌ Error creating {table_name}: {str(e)}")
        return False

print("\n🎯 Setup complete! Ready to create MANAGED tables...")
print("📝 Use create_table(table_name, schema) in subsequent cells")

In [None]:
# ===========================
# TABLE 1: CustomerAccount
# ===========================

# Define the schema using PySpark StructType
customer_account_schema = StructType([
    StructField("CustomerAccountId", StringType(), False),  # NOT NULL
    StructField("CustomerAccountName", StringType(), True),
    StructField("CustomerAccountOpenedDate", DateType(), True),
    StructField("CustomerAccountClosedDate", DateType(), True),
    StructField("GlobalServiceRelationshipNumber", DecimalType(18,1), True),
    StructField("CustomerId", StringType(), True),
    StructField("ResponsibilityCenterId", StringType(), True),
    StructField("SubaccountOfCustomerAccountId", StringType(), True),
    StructField("LedgerId", StringType(), True),
    StructField("LedgerAccountNumber", StringType(), True),
    StructField("IsoCurrencyCode", StringType(), True),
    StructField("CustomerAccountTypeId", StringType(), True),
    StructField("CustomerAccountApplicationNumber", IntegerType(), True)
])

# Create the table using our helper function
create_table("CustomerAccount", customer_account_schema, "Customer account information")

# Verify the table was created
print("\n🔍 Verification:")
spark.sql("SHOW TABLES IN retail").show()

In [None]:
# ===========================
# TABLE 2: CustomerAccountEmail
# ===========================

customer_account_email_schema = StructType([
    StructField("CustomerAccountId", StringType(), True),
    StructField("EmailTypeId", StringType(), True),
    StructField("EmailAddress", StringType(), False),  # NOT NULL
    StructField("PeriodStartTimestamp", TimestampType(), False),  # NOT NULL
    StructField("PeriodEndTimestamp", TimestampType(), True),
    StructField("PreferenceId", StringType(), True),
    StructField("CustomerAccountEmailNote", StringType(), True)
])

create_table("CustomerAccountEmail", customer_account_email_schema, "Customer account email addresses")

In [None]:
# ===========================
# TABLE 3: CustomerAccountLocation
# ===========================

customer_account_location_schema = StructType([
    StructField("CustomerAccountId", StringType(), True),
    StructField("LocationTypeId", StringType(), True),
    StructField("LocationId", StringType(), True),
    StructField("PeriodStartTimestamp", TimestampType(), False),  # NOT NULL
    StructField("PeriodEndTimestamp", TimestampType(), True),
    StructField("GlobalLocationNumber", DecimalType(13,1), True),
    StructField("PreferenceId", StringType(), True),
    StructField("CustomerAccountLocationNote", StringType(), True)
])

create_table("CustomerAccountLocation", customer_account_location_schema, "Customer account locations")

In [None]:
# ===========================
# TABLE 4: CustomerAccountTelephoneNumber
# ===========================

customer_account_telephone_schema = StructType([
    StructField("CustomerAccountId", StringType(), True),
    StructField("TelephoneNumberTypeId", StringType(), True),
    StructField("TelephoneNumber", DecimalType(15,1), False),  # NOT NULL
    StructField("PeriodStartTimestamp", TimestampType(), False),  # NOT NULL
    StructField("PeriodEndTimestamp", TimestampType(), True),
    StructField("PreferenceId", StringType(), True),
    StructField("TelephoneNumberNote", StringType(), True),
    StructField("LocationId", StringType(), True),
    StructField("TextCapableIndicator", BooleanType(), True)
])

create_table("CustomerAccountTelephoneNumber", customer_account_telephone_schema, "Customer account telephone numbers")

In [None]:
# ===========================
# TABLE 5: CustomerGroup
# ===========================

customer_group_schema = StructType([
    StructField("CustomerGroupId", StringType(), False),  # NOT NULL
    StructField("CustomerGroupName", StringType(), True),
    StructField("CustomerGroupDescription", StringType(), True),
    StructField("ClassificationAgencyPartyId", StringType(), True)
])

create_table("CustomerGroup", customer_group_schema, "Customer group information")

In [None]:
# ===========================
# TABLE 6: CustomerLocation
# ===========================

customer_location_schema = StructType([
    StructField("CustomerId", StringType(), True),
    StructField("LocationTypeId", StringType(), True),
    StructField("LocationId", StringType(), True),
    StructField("PeriodStartTimestamp", TimestampType(), False),  # NOT NULL
    StructField("PeriodEndTimestamp", TimestampType(), True),
    StructField("GlobalLocationNumber", DecimalType(13,1), True),
    StructField("PreferenceId", StringType(), True),
    StructField("CustomerLocationNote", StringType(), True)
])

create_table("CustomerLocation", customer_location_schema, "Customer locations")

In [None]:
# ===========================
# TABLE 7: CustomerName
# ===========================

customer_name_schema = StructType([
    StructField("CustomerId", StringType(), True),
    StructField("CustomerNameId", StringType(), False),  # NOT NULL
    StructField("NameUsageId", StringType(), True),
    StructField("StartDate", DateType(), True),
    StructField("EndDate", DateType(), True),
    StructField("CustomerNameNote", StringType(), True)
])

create_table("CustomerName", customer_name_schema, "Customer names")

In [None]:
# ===========================
# TABLE 8: CustomerRelationshipType
# ===========================

customer_relationship_type_schema = StructType([
    StructField("CustomerRelationshipTypeId", StringType(), False),  # NOT NULL
    StructField("CustomerRelationshipTypeName", StringType(), True),
    StructField("CustomerRelationshipTypeDescription", StringType(), True)
])

create_table("CustomerRelationshipType", customer_relationship_type_schema, "Customer relationship types")

In [None]:
# ===========================
# TABLE 9: CustomerStatusType
# ===========================

customer_status_type_schema = StructType([
    StructField("CustomerStatusTypeId", StringType(), False),  # NOT NULL
    StructField("CustomerStatusTypeName", StringType(), True),
    StructField("CustomerStatusTypeDescription", StringType(), True)
])

create_table("CustomerStatusType", customer_status_type_schema, "Customer status types")

In [None]:
# ===========================
# TABLE 10: CustomerTelephoneNumber
# ===========================

customer_telephone_schema = StructType([
    StructField("CustomerId", StringType(), True),
    StructField("TelephoneNumberTypeId", StringType(), True),
    StructField("TelephoneNumber", DecimalType(15,1), False),  # NOT NULL
    StructField("PeriodStartTimestamp", TimestampType(), False),  # NOT NULL
    StructField("PeriodEndTimestamp", TimestampType(), True),
    StructField("PreferenceId", StringType(), True),
    StructField("TelephoneNumberNote", StringType(), True),
    StructField("LocationId", StringType(), True),
    StructField("TextCapableIndicator", BooleanType(), True)
])

create_table("CustomerTelephoneNumber", customer_telephone_schema, "Customer telephone numbers")

In [None]:
# ===========================
# TABLE 11: CustomerTradeName
# ===========================

customer_trade_name_schema = StructType([
    StructField("CustomerId", StringType(), True),
    StructField("TradeNameId", StringType(), True),
    StructField("PeriodStartDate", DateType(), False),  # NOT NULL
    StructField("PeriodEndDate", DateType(), True),
    StructField("CustomerTradeNameNote", StringType(), True)
])

create_table("CustomerTradeName", customer_trade_name_schema, "Customer trade names")

In [None]:
# ===========================
# TABLE 12: CustomerType
# ===========================

customer_type_schema = StructType([
    StructField("CustomerTypeId", StringType(), False),  # NOT NULL
    StructField("CustomerTypeName", StringType(), True),
    StructField("CustomerTypeDescription", StringType(), True)
])

create_table("CustomerType", customer_type_schema, "Customer types")

In [None]:
# ===========================
# TABLE 13: HouseholdLocation
# ===========================

household_location_schema = StructType([
    StructField("HouseholdId", StringType(), True),
    StructField("LocationId", StringType(), True),
    StructField("PeriodStartDate", DateType(), False),  # NOT NULL
    StructField("PeriodEndDate", DateType(), True),
    StructField("HousingUnitTypeId", StringType(), True)
])

create_table("HouseholdLocation", household_location_schema, "Household locations")

In [None]:
# ===========================
# TABLE 14: IndividualCustomer
# ===========================

individual_customer_schema = StructType([
    StructField("CustomerId", StringType(), True),
    StructField("IndividualCustomerName", StringType(), True),
    StructField("DateOfBirth", DateType(), True),
    StructField("DateOfDeath", DateType(), True),
    StructField("SexId", StringType(), True),
    StructField("GenderId", StringType(), True),
    StructField("EthnicCategoryId", StringType(), True),
    StructField("RacialCategoryId", StringType(), True)
])

create_table("IndividualCustomer", individual_customer_schema, "Individual customer information")

In [None]:
# ===========================
# TABLE 15: Invoice
# ===========================

invoice_schema = StructType([
    StructField("InvoiceId", StringType(), False),  # NOT NULL
    StructField("CustomerAccountId", StringType(), True),
    StructField("InvoiceDate", DateType(), True),
    StructField("InvoiceToName", StringType(), True),
    StructField("InvoiceToPartyId", StringType(), True),
    StructField("InvoiceToLocationId", StringType(), True),
    StructField("InvoiceToTelephoneNumber", DecimalType(15,1), True),
    StructField("InvoiceToFaxNumber", DecimalType(15,1), True),
    StructField("InvoiceToEmailAddress", StringType(), True),
    StructField("InvoiceNote", StringType(), True),
    StructField("TotalInvoiceProductAmount", DecimalType(18,2), True),
    StructField("TotalInvoiceChargesAmount", DecimalType(18,2), True),
    StructField("TotalInvoiceAdjustmentsAmount", DecimalType(18,2), True),
    StructField("TotalInvoiceTaxesAmount", DecimalType(18,2), True),
    StructField("TotalInvoiceAmount", DecimalType(18,2), True),
    StructField("InvoiceModeId", StringType(), True),
    StructField("IsoCurrencyCode", StringType(), True),
    StructField("InvoiceStatusId", StringType(), True),
    StructField("IsoLanguageId", StringType(), True),
    StructField("OrderId", StringType(), True)
])

create_table("Invoice", invoice_schema, "Invoice information")

In [None]:
# ===========================
# TABLE 16: InvoiceLine
# ===========================

invoice_line_schema = StructType([
    StructField("InvoiceId", StringType(), True),
    StructField("InvoiceLineNumber", IntegerType(), False),  # NOT NULL
    StructField("Quantity", DecimalType(18,2), True),
    StructField("UnitPriceAmount", DecimalType(18,2), True),
    StructField("SalesPriceAmount", DecimalType(18,2), True),
    StructField("InvoiceLineItemNote", StringType(), True),
    StructField("ProductId", StringType(), True),
    StructField("ItemSku", StringType(), True),
    StructField("TotalProductInvoiceAmount", DecimalType(18,2), True),
    StructField("ChargeId", StringType(), True),
    StructField("InvoiceLineChargeAmount", DecimalType(18,2), True),
    StructField("InvoiceLineAdjustmentsAmount", DecimalType(18,2), True),
    StructField("OrderLineNumber", IntegerType(), True),
    StructField("IsoCurrencyCode", StringType(), True),
    StructField("InvoiceLineTypeId", StringType(), True),
    StructField("OrderId", StringType(), True)
])

create_table("InvoiceLine", invoice_line_schema, "Invoice line items")

In [None]:
# ===========================
# TABLE 17: Location
# ===========================

location_schema = StructType([
    StructField("LocationId", StringType(), False),  # NOT NULL
    StructField("LocationName", StringType(), True),
    StructField("LocationDescription", StringType(), True),
    StructField("LocationAddressLine1", StringType(), True),
    StructField("LocationAddressLine2", StringType(), True),
    StructField("LocationCity", StringType(), True),
    StructField("LocationStateId", StringType(), True),
    StructField("LocationZipCode", DecimalType(11,1), True),
    StructField("LocationNote", StringType(), True),
    StructField("LocationLatitude", DecimalType(10,7), True),
    StructField("LocationLongitude", DecimalType(10,7), True),
    StructField("LocationDatum", StringType(), True),
    StructField("LocationElevation", DecimalType(18,8), True),
    StructField("LocationElevationUnitOfMeasureId", StringType(), True),
    StructField("GlobalLocationNumber", DecimalType(13,1), True),
    StructField("TimezoneId", StringType(), True),
    StructField("DaylightSavingsTimeObservedIndicator", BooleanType(), True),
    StructField("CountryId", StringType(), True),
    StructField("SubdivisionId", StringType(), True)
])

create_table("Location", location_schema, "Location information")

In [None]:
# ===========================
# TABLE 18: Order
# ===========================

order_schema = StructType([
    StructField("OrderId", StringType(), False),  # NOT NULL
    StructField("SalesChannelId", StringType(), True),
    StructField("OrderDate", DateType(), True),
    StructField("OrderFromPartyId", StringType(), True),
    StructField("OrderFromName", StringType(), True),
    StructField("OrderFromLocationId", StringType(), True),
    StructField("OrderToName", StringType(), True),
    StructField("OrderToPartyId", StringType(), True),
    StructField("OrderToLocationId", StringType(), True),
    StructField("OrderToTelephoneNumber", DecimalType(15,1), True),
    StructField("OrderToFaxNumber", DecimalType(15,1), True),
    StructField("OrderToEmailAddress", StringType(), True),
    StructField("RequestedShipToName", StringType(), True),
    StructField("RequestedShipToPartyId", StringType(), True),
    StructField("RequestedShipToLocationId", StringType(), True),
    StructField("RequestedShipToTelephoneNumber", DecimalType(15,1), True),
    StructField("RequestedShipToFaxNumber", DecimalType(15,1), True),
    StructField("RequestedShipToEmailAddress", StringType(), True),
    StructField("CustomerAccountId", StringType(), True),
    StructField("OrderTypeId", StringType(), True),
    StructField("OrderModeId", StringType(), True),
    StructField("OrderStatusId", StringType(), True),
    StructField("OrderPriorityId", StringType(), True),
    StructField("DunningId", StringType(), True),
    StructField("TotalOrderProductAmount", DecimalType(18,2), True),
    StructField("TotalOrderChargesAmount", DecimalType(18,2), True),
    StructField("TotalOrderAdjustmentsAmount", DecimalType(18,2), True),
    StructField("TotalOrderTaxesAmount", DecimalType(18,2), True),
    StructField("TotalOrderAmount", DecimalType(18,2), True),
    StructField("IsoCurrencyCode", StringType(), True),
    StructField("IsoLanguageId", StringType(), True),
    StructField("OrderNote", StringType(), True),
    StructField("RequestedDeliveryDate", DateType(), True),
    StructField("OrderTakenByEmployeeId", StringType(), True),
    StructField("SalesRepresentativeId", StringType(), True),
    StructField("PromotionId", StringType(), True)
])

create_table("Order", order_schema, "Order information")

In [None]:
# ===========================
# TABLE 19: OrderLine
# ===========================

order_line_schema = StructType([
    StructField("OrderId", StringType(), True),
    StructField("OrderLineNumber", IntegerType(), False),  # NOT NULL
    StructField("ProductId", StringType(), True),
    StructField("ItemSku", StringType(), True),
    StructField("RequestedQuantity", DecimalType(18,2), True),
    StructField("UnitPriceAmount", DecimalType(18,2), True),
    StructField("SalesPriceAmount", DecimalType(18,2), True),
    StructField("OrderLineItemNote", StringType(), True),
    StructField("ChargeId", StringType(), True),
    StructField("OrderLineChargeAmount", DecimalType(18,2), True),
    StructField("OrderLineAdjustmentsAmount", DecimalType(18,2), True),
    StructField("TotalProductOrderAmount", DecimalType(18,2), True),
    StructField("IsoCurrencyCode", StringType(), True),
    StructField("OrderLineTypeId", StringType(), True),
    StructField("RequestedDeliveryDate", DateType(), True)
])

create_table("OrderLine", order_line_schema, "Order line items")

In [None]:
# ===========================
# TABLE 20: Product
# ===========================

product_schema = StructType([
    StructField("ProductId", StringType(), False),  # NOT NULL
    StructField("ProductName", StringType(), True),
    StructField("ProductDescription", StringType(), True),
    StructField("ProductSize", StringType(), True),
    StructField("ProductColor", StringType(), True),
    StructField("ItemSku", StringType(), True),
    StructField("ManufacturerProductName", StringType(), True),
    StructField("ManufacturerProductDescription", StringType(), True),
    StructField("ManufacturerProductId", StringType(), True),
    StructField("ProductCategoryId", StringType(), True),
    StructField("ProductSubcategoryId", StringType(), True),
    StructField("BrandId", StringType(), True),
    StructField("ProductModelId", StringType(), True),
    StructField("ProductNote", StringType(), True),
    StructField("RetailUnitOfMeasureId", StringType(), True),
    StructField("ProductLength", DecimalType(18,8), True),
    StructField("ProductWidth", DecimalType(18,8), True),
    StructField("ProductHeight", DecimalType(18,8), True),
    StructField("ProductDepth", DecimalType(18,8), True),
    StructField("ProductDimensionUnitOfMeasureId", StringType(), True),
    StructField("ProductWeight", DecimalType(18,8), True),
    StructField("ProductWeightUnitOfMeasureId", StringType(), True),
    StructField("ProductIntroductionDate", DateType(), True),
    StructField("ProductDiscontinuationDate", DateType(), True),
    StructField("ProductMsrpAmount", DecimalType(18,2), True),
    StructField("ProductCostAmount", DecimalType(18,2), True),
    StructField("IsoCurrencyCode", StringType(), True),
    StructField("ProductListPriceAmount", DecimalType(18,2), True),
    StructField("ProductStandardCostAmount", DecimalType(18,2), True),
    StructField("ProductMinimumOrderQuantity", DecimalType(18,2), True),
    StructField("ProductMaximumOrderQuantity", DecimalType(18,2), True),
    StructField("ProductImageNote", StringType(), True),
    StructField("ProductImageURL", StringType(), True),
    StructField("ProductVideoNote", StringType(), True),
    StructField("ProductVideoURL", StringType(), True),
    StructField("ProductDocumentNote", StringType(), True),
    StructField("ProductDocumentURL", StringType(), True),
    StructField("AssociatedServiceId", StringType(), True),
    StructField("TaxCategoryId", StringType(), True),
    StructField("IsoLanguageId", StringType(), True)
])

create_table("Product", product_schema, "Product information")

In [None]:
# ===========================
# TABLE 21: OrderActivityType
# ===========================

order_activity_type_schema = StructType([
    StructField("OrderActivityTypeId", StringType(), False),  # NOT NULL
    StructField("OrderActivityTypeName", StringType(), True),
    StructField("OrderActivityTypeDescription", StringType(), True)
])

create_table("OrderActivityType", order_activity_type_schema, "Order activity types")

# ===========================
# TABLE 22: OrderAdjustment
# ===========================

order_adjustment_schema = StructType([
    StructField("OrderId", StringType(), True),
    StructField("AdjustmentId", StringType(), True),
    StructField("OrderAdjustmentTimestamp", TimestampType(), True),
    StructField("OrderAdjustmentAmount", DecimalType(18,2), True),
    StructField("OrderAdjustmentNote", StringType(), True),
    StructField("OrderAdjustmentReasonId", StringType(), True)
])

create_table("OrderAdjustment", order_adjustment_schema, "Order adjustments")

# ===========================
# TABLE 23: OrderCharge
# ===========================

order_charge_schema = StructType([
    StructField("OrderId", StringType(), True),
    StructField("OrderChargeTypeId", StringType(), True),
    StructField("OrderChargeAmount", DecimalType(18,2), True),
    StructField("ChargeByPartyId", StringType(), True),
    StructField("OrderChargeNote", StringType(), True)
])

create_table("OrderCharge", order_charge_schema, "Order charges")

# ===========================
# TABLE 24: OrderChargeType
# ===========================

order_charge_type_schema = StructType([
    StructField("OrderChargeTypeId", StringType(), False),  # NOT NULL
    StructField("OrderChargeTypeName", StringType(), True),
    StructField("OrderChargeTypeDescription", StringType(), True)
])

create_table("OrderChargeType", order_charge_type_schema, "Order charge types")

# ===========================
# TABLE 25: OrderClassification
# ===========================

order_classification_schema = StructType([
    StructField("OrderId", StringType(), True),
    StructField("ClassificationId", StringType(), True),
    StructField("OrderClassificationNote", StringType(), True)
])

create_table("OrderClassification", order_classification_schema, "Order classifications")

# ===========================
# TABLE 26: OrderCondition
# ===========================

order_condition_schema = StructType([
    StructField("OrderConditionId", StringType(), False),  # NOT NULL
    StructField("OrderConditionName", StringType(), True),
    StructField("OrderConditionDescription", StringType(), True)
])

create_table("OrderCondition", order_condition_schema, "Order conditions")

# ===========================
# TABLE 27: OrderDeliveryTerm
# ===========================

order_delivery_term_schema = StructType([
    StructField("OrderId", StringType(), True),
    StructField("DeliveryTermId", StringType(), True),
    StructField("OrderDeliveryTermNote", StringType(), True)
])

create_table("OrderDeliveryTerm", order_delivery_term_schema, "Order delivery terms")

# ===========================
# TABLE 28: OrderFinanceTerm
# ===========================

order_finance_term_schema = StructType([
    StructField("OrderId", StringType(), True),
    StructField("FinanceTermId", StringType(), True),
    StructField("OrderFinanceTermNote", StringType(), True)
])

create_table("OrderFinanceTerm", order_finance_term_schema, "Order finance terms")

# ===========================
# TABLE 29: OrderHold
# ===========================

order_hold_schema = StructType([
    StructField("OrderId", StringType(), True),
    StructField("OrderHoldPlacedTimestamp", TimestampType(), False),  # NOT NULL
    StructField("OrderHoldReleasedTimestamp", TimestampType(), True),
    StructField("HoldReasonId", StringType(), True),
    StructField("HoldTypeId", StringType(), True),
    StructField("OrderHoldNote", StringType(), True)
])

create_table("OrderHold", order_hold_schema, "Order holds")

# ===========================
# TABLE 30: OrderLanguageUsage
# ===========================

order_language_usage_schema = StructType([
    StructField("OrderId", StringType(), True),
    StructField("IsoLanguageId", StringType(), True),
    StructField("LanguageUsageId", StringType(), True),
    StructField("OrderLanguageUsageNote", StringType(), True)
])

create_table("OrderLanguageUsage", order_language_usage_schema, "Order language usage")

In [None]:
# ===========================
# TABLE 31: OrderLineAdjustment
# ===========================

order_line_adjustment_schema = StructType([
    StructField("OrderId", StringType(), True),
    StructField("OrderLineNumber", IntegerType(), True),
    StructField("AdjustmentId", StringType(), True),
    StructField("OrderLineAdjustmentTimestamp", TimestampType(), True),
    StructField("OrderLineAdjustmentAmount", DecimalType(18,2), True),
    StructField("OrderLineAdjustmentQuantity", DecimalType(18,2), True),
    StructField("OrderLineAdjustmentNote", StringType(), True),
    StructField("OrderLineAdjustmentReasonId", StringType(), True)
])

create_table("OrderLineAdjustment", order_line_adjustment_schema, "Order line adjustments")

# ===========================
# TABLE 32: OrderLineAdjustmentReason
# ===========================

order_line_adjustment_reason_schema = StructType([
    StructField("OrderLineAdjustmentReasonId", StringType(), False),  # NOT NULL
    StructField("OrderLineAdjustmentReasonName", StringType(), True),
    StructField("OrderLineAdjustmentReasonDescription", StringType(), True)
])

create_table("OrderLineAdjustmentReason", order_line_adjustment_reason_schema, "Order line adjustment reasons")

# ===========================
# TABLE 33: OrderLineCharge
# ===========================

order_line_charge_schema = StructType([
    StructField("OrderId", StringType(), True),
    StructField("OrderLineNumber", IntegerType(), True),
    StructField("OrderChargeTypeId", StringType(), True),
    StructField("OrderLineChargeAmount", DecimalType(18,2), True),
    StructField("ChargeByPartyId", StringType(), True),
    StructField("OrderLineChargeNote", StringType(), True)
])

create_table("OrderLineCharge", order_line_charge_schema, "Order line charges")

# ===========================
# TABLE 34: OrderLineHold
# ===========================

order_line_hold_schema = StructType([
    StructField("OrderId", StringType(), True),
    StructField("OrderLineNumber", IntegerType(), True),
    StructField("OrderLineHoldPlacedTimestamp", TimestampType(), False),  # NOT NULL
    StructField("OrderLineHoldReleasedTimestamp", TimestampType(), True),
    StructField("HoldReasonId", StringType(), True),
    StructField("HoldTypeId", StringType(), True),
    StructField("OrderLineHoldNote", StringType(), True)
])

create_table("OrderLineHold", order_line_hold_schema, "Order line holds")

# ===========================
# TABLE 35: OrderLineStatus
# ===========================

order_line_status_schema = StructType([
    StructField("OrderId", StringType(), True),
    StructField("OrderLineNumber", IntegerType(), True),
    StructField("OrderLineStatusStartTimestamp", TimestampType(), False),  # NOT NULL
    StructField("OrderLineStatusEndTimestamp", TimestampType(), True),
    StructField("OrderStatusTypeId", StringType(), True)
])

create_table("OrderLineStatus", order_line_status_schema, "Order line status")

# ===========================
# TABLE 36: OrderPartyRelationshipType
# ===========================

order_party_relationship_type_schema = StructType([
    StructField("OrderPartyRelationshipTypeId", StringType(), False),  # NOT NULL
    StructField("OrderPartyRelationshipTypeName", StringType(), True),
    StructField("OrderPartyRelationshipTypeDescription", StringType(), True)
])

create_table("OrderPartyRelationshipType", order_party_relationship_type_schema, "Order party relationship types")

# ===========================
# TABLE 37: OrderPayment
# ===========================

order_payment_schema = StructType([
    StructField("OrderId", StringType(), True),
    StructField("PaymentId", StringType(), True),
    StructField("AmountAppliedToOrder", DecimalType(18,2), True)
])

create_table("OrderPayment", order_payment_schema, "Order payments")

# ===========================
# TABLE 38: OrderProcessingStatus
# ===========================

order_processing_status_schema = StructType([
    StructField("OrderProcessingStatusId", StringType(), False),  # NOT NULL
    StructField("OrderProcessingStatusName", StringType(), True),
    StructField("OrderProcessingStatusDescription", StringType(), True)
])

create_table("OrderProcessingStatus", order_processing_status_schema, "Order processing status")

# ===========================
# TABLE 39: OrderRelatedParty
# ===========================

order_related_party_schema = StructType([
    StructField("OrderId", StringType(), True),
    StructField("PartyId", StringType(), True),
    StructField("OrderPartyRelationshipTypeId", StringType(), True),
    StructField("OrderPartyRelationshipNote", StringType(), True)
])

create_table("OrderRelatedParty", order_related_party_schema, "Order related parties")

# ===========================
# TABLE 40: OrderSalesTerm
# ===========================

order_sales_term_schema = StructType([
    StructField("OrderId", StringType(), True),
    StructField("SalesTermId", StringType(), True),
    StructField("OrderSalesTermNote", StringType(), True)
])

create_table("OrderSalesTerm", order_sales_term_schema, "Order sales terms")

# ===========================
# TABLE 41: OrderStatus
# ===========================

order_status_schema = StructType([
    StructField("OrderId", StringType(), True),
    StructField("OrderStatusStartTimestamp", TimestampType(), False),  # NOT NULL
    StructField("OrderStatusEndTimestamp", TimestampType(), True),
    StructField("OrderStatusTypeId", StringType(), True)
])

create_table("OrderStatus", order_status_schema, "Order status")

# ===========================
# TABLE 42: OrderStatusType
# ===========================

order_status_type_schema = StructType([
    StructField("OrderStatusTypeId", StringType(), False),  # NOT NULL
    StructField("OrderStatusTypeName", StringType(), True),
    StructField("OrderStatusTypeDescription", StringType(), True)
])

create_table("OrderStatusType", order_status_type_schema, "Order status types")

# ===========================
# TABLE 43: OrderType
# ===========================

order_type_schema = StructType([
    StructField("OrderTypeId", StringType(), False),  # NOT NULL
    StructField("OrderTypeName", StringType(), True),
    StructField("OrderTypeDescription", StringType(), True)
])

create_table("OrderType", order_type_schema, "Order types")

# ===========================
# TABLE 44: Party
# ===========================

party_schema = StructType([
    StructField("PartyId", StringType(), False),  # NOT NULL
    StructField("PartyName", StringType(), True),
    StructField("PartyTypeId", StringType(), True),
    StructField("GlobalLocationNumber", DecimalType(13,1), True)
])

create_table("Party", party_schema, "Party information")

# ===========================
# TABLE 45: PartyLocation
# ===========================

party_location_schema = StructType([
    StructField("PartyId", StringType(), True),
    StructField("LocationTypeId", StringType(), True),
    StructField("LocationId", StringType(), True),
    StructField("PeriodStartTimestamp", TimestampType(), False),  # NOT NULL
    StructField("PeriodEndTimestamp", TimestampType(), True),
    StructField("GlobalLocationNumber", DecimalType(13,1), True),
    StructField("PreferenceId", StringType(), True),
    StructField("PartyLocationNote", StringType(), True)
])

create_table("PartyLocation", party_location_schema, "Party locations")

# ===========================
# TABLE 46: PartyTelephoneNumber
# ===========================

party_telephone_number_schema = StructType([
    StructField("PartyId", StringType(), True),
    StructField("TelephoneNumberTypeId", StringType(), True),
    StructField("TelephoneNumber", DecimalType(15,1), False),  # NOT NULL
    StructField("PeriodStartTimestamp", TimestampType(), False),  # NOT NULL
    StructField("PeriodEndTimestamp", TimestampType(), True),
    StructField("PreferenceId", StringType(), True),
    StructField("TelephoneNumberNote", StringType(), True),
    StructField("LocationId", StringType(), True),
    StructField("TextCapableIndicator", BooleanType(), True)
])

create_table("PartyTelephoneNumber", party_telephone_number_schema, "Party telephone numbers")

# ===========================
# TABLE 47: Retailer
# ===========================

retailer_schema = StructType([
    StructField("RetailerId", StringType(), False),  # NOT NULL
    StructField("RetailerName", StringType(), True),
    StructField("LocationId", StringType(), True),
    StructField("RetailerTelephoneNumber", DecimalType(15,1), True),
    StructField("RetailerFaxNumber", DecimalType(15,1), True),
    StructField("RetailerEmailAddress", StringType(), True),
    StructField("PartyId", StringType(), True),
    StructField("RetailerNote", StringType(), True)
])

create_table("Retailer", retailer_schema, "Retailer information")

# ===========================
# TABLE 48: SalesOrderCondition
# ===========================

sales_order_condition_schema = StructType([
    StructField("OrderId", StringType(), True),
    StructField("OrderConditionId", StringType(), True),
    StructField("SalesOrderConditionNote", StringType(), True)
])

create_table("SalesOrderCondition", sales_order_condition_schema, "Sales order conditions")

# ===========================
# TABLE 49: UsaLocation
# ===========================

usa_location_schema = StructType([
    StructField("LocationId", StringType(), True),
    StructField("AddressLine1", StringType(), False),  # NOT NULL
    StructField("AddressLine2", StringType(), False),  # NOT NULL
    StructField("City", StringType(), True),
    StructField("StateId", StringType(), True),
    StructField("ZipCode", DecimalType(18,2), True),
    StructField("CensusDivisionId", StringType(), True),
    StructField("CensusRegionId", StringType(), True),
    StructField("CensusTrackBlockNumberingAreaId", StringType(), True),
    StructField("BlockGroupId", StringType(), True),
    StructField("BlockId", StringType(), True),
    StructField("FipsCode", StringType(), True),
    StructField("CongressionalDistrictId", StringType(), True),
    StructField("TrafficAnalysisZoneId", StringType(), True),
    StructField("UrbanizedAreaId", StringType(), True),
    StructField("StateLegislativeDistrictId", StringType(), True),
    StructField("SchoolDistrictId", StringType(), True),
    StructField("VotingDistrictId", StringType(), True),
    StructField("CountyCode", StringType(), True)
])

create_table("UsaLocation", usa_location_schema, "USA location details")

# ===========================
# TABLE 50: UsLocation
# ===========================

us_location_schema = StructType([
    StructField("LocationId", StringType(), False),  # NOT NULL
    StructField("LocationName", StringType(), True),
    StructField("CountyCode", StringType(), True)
])

create_table("UsLocation", us_location_schema, "US location information")

In [None]:
print("="*80)
print("🎉 RETAIL SCHEMA CREATION COMPLETE!")
print("="*80)
print(f"✅ Successfully created {len([x for x in locals().keys() if x.endswith('_schema')])} tables in the 'retail' schema")
print("✅ All tables are created as MANAGED tables using Delta format")
print("✅ Schema definitions preserve original data types and constraints")
print("✅ Ready for data loading and analytics workloads")
print("="*80)

# Show available tables in the retail schema
try:
    retail_tables = spark.sql("SHOW TABLES IN retail").collect()
    print(f"📊 Tables available in retail schema: {len(retail_tables)}")
    for table in retail_tables:
        print(f"   • {table.tableName}")
except Exception as e:
    print(f"Note: Run this notebook to create all tables in the retail schema")
    
print("="*80)