Initialize Data

In [0]:
%run "../00-init/load-data"

Delta Table Setup

In [0]:
def prepare_source_table_schema(table_name):
    print(f"Ensuring schema columns exist for {table_name}...")
    try:
        current_schema = spark.table(table_name).columns
        columns_to_add = []

        if "en_route" not in current_schema:
            spark.sql(f"ALTER TABLE {table_name} ADD COLUMN en_route BOOLEAN")
            spark.sql(f"UPDATE {table_name} SET en_route = False")

        if "delivered" not in current_schema:
            spark.sql(f"ALTER TABLE {table_name} ADD COLUMN delivered BOOLEAN")
            spark.sql(f"UPDATE {table_name} SET delivered = False")

        if "cookies" not in current_schema:
            spark.sql(f"ALTER TABLE {table_name} ADD COLUMN cookies INT")
            spark.sql(f"UPDATE {table_name} SET cookies = Null")

        print("✓ Schema meets requirements.")

    except Exception as e:
        print(f"Error modifying source table. Error: {e}")
        raise e

source_table_name = "main.dbrx_12daysofdemos.gift_requests"
prepare_source_table_schema(source_table_name)

spark.sql(f"ALTER TABLE {source_table_name} SET TBLPROPERTIES (delta.enableChangeDataFeed = true)")

Setup Lakebase using Databricks SDK

In [0]:
# Example: Create a Lakebase instance using the Databricks SDK
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.database import DatabaseInstance, DatabaseCatalog, SyncedDatabaseTable, SyncedTableSpec, NewPipelineSpec, SyncedTableSchedulingPolicy

# Initialize the Databricks workspace client
w = WorkspaceClient()

# Configure the database instance class
db_instance_config = DatabaseInstance(
    name="lakebase-demo",
    capacity="CU_2",  # Compute capacity: CU_1, CU_2, CU_4, etc.
  )

# Create a new Lakebase database instance
instance = w.database.create_database_instance(db_instance_config)

print(f"✓ Instance created: {db_instance_config.name}")

In [0]:
# Configure the catalog class
catalog_config = DatabaseCatalog(
        name="lakebase_demo_catalog",                    # Name of the UC catalog to create
        database_instance_name=db_instance_config.name,         # Name of the database instance
        database_name="databricks_postgres",             # Name of the existing Postgres database
    )

# Register an existing database as a UC catalog
catalog = w.database.create_database_catalog(catalog_config)
print(f"✓ Created database catalog: {catalog_config.name}")

Create the Synced Lakebase Table

In [0]:
# Configure the synced table spec class
synced_table_spec = SyncedTableSpec(
            source_table_full_name=source_table_name,
            primary_key_columns=["request_id"],  # Primary key columns
            scheduling_policy=SyncedTableSchedulingPolicy.TRIGGERED,
            timeseries_key="timestamp",  # For deduplication
            new_pipeline_spec=NewPipelineSpec(
                storage_catalog="main",
                storage_schema="dbrx_12daysofdemos"
            )
        )

# Configure the synced table class
synced_table_config = SyncedDatabaseTable(
        name=catalog_config.name+".public.gift_requests_synced_table",
        spec=synced_table_spec,
    )

# Create a synced table in a database catalog
synced_table = w.database.create_synced_database_table(synced_table_config)
print(f"✓ Created synced table: {synced_table_config.name}")