# 03 â€” Generate shifts + assignments (bronze)

Creates upcoming shifts for the next `DAYS_SCHEDULE` days and assignments with intentional gaps.


In [None]:
%pip install faker==25.2.0


In [None]:
# Configuration (Databricks widgets)
# These widgets make the demo portable across workspaces/accounts.
# If you're running this outside a Databricks notebook, it will fall back to defaults.

DEFAULT_CATALOG = "rtpa_catalog"
DEFAULT_SCHEMA_REF = "credentialing_ref"
DEFAULT_SCHEMA_BRONZE = "credentialing_bronze"
DEFAULT_SCHEMA_SILVER = "credentialing_silver"
DEFAULT_SCHEMA_GOLD = "credentialing_gold"

DEFAULT_N_PROVIDERS = 200
DEFAULT_DAYS_SCHEDULE = 14
DEFAULT_SEED = 42

try:
    dbutils.widgets.text("catalog", DEFAULT_CATALOG, "Catalog")
    dbutils.widgets.text("schema_ref", DEFAULT_SCHEMA_REF, "Schema (ref)")
    dbutils.widgets.text("schema_bronze", DEFAULT_SCHEMA_BRONZE, "Schema (bronze)")
    dbutils.widgets.text("schema_silver", DEFAULT_SCHEMA_SILVER, "Schema (silver)")
    dbutils.widgets.text("schema_gold", DEFAULT_SCHEMA_GOLD, "Schema (gold)")

    dbutils.widgets.text("n_providers", str(DEFAULT_N_PROVIDERS), "N providers")
    dbutils.widgets.text("days_schedule", str(DEFAULT_DAYS_SCHEDULE), "Days schedule")
    dbutils.widgets.text("seed", str(DEFAULT_SEED), "Random seed")

    catalog = dbutils.widgets.get("catalog") or DEFAULT_CATALOG
    schema_ref = dbutils.widgets.get("schema_ref") or DEFAULT_SCHEMA_REF
    schema_bronze = dbutils.widgets.get("schema_bronze") or DEFAULT_SCHEMA_BRONZE
    schema_silver = dbutils.widgets.get("schema_silver") or DEFAULT_SCHEMA_SILVER
    schema_gold = dbutils.widgets.get("schema_gold") or DEFAULT_SCHEMA_GOLD

    N_PROVIDERS = int(dbutils.widgets.get("n_providers") or DEFAULT_N_PROVIDERS)
    DAYS_SCHEDULE = int(dbutils.widgets.get("days_schedule") or DEFAULT_DAYS_SCHEDULE)
    SEED = int(dbutils.widgets.get("seed") or DEFAULT_SEED)
except Exception:
    catalog = DEFAULT_CATALOG
    schema_ref = DEFAULT_SCHEMA_REF
    schema_bronze = DEFAULT_SCHEMA_BRONZE
    schema_silver = DEFAULT_SCHEMA_SILVER
    schema_gold = DEFAULT_SCHEMA_GOLD

    N_PROVIDERS = DEFAULT_N_PROVIDERS
    DAYS_SCHEDULE = DEFAULT_DAYS_SCHEDULE
    SEED = DEFAULT_SEED

# Derived helpers
fq = lambda sch, tbl: f"{catalog}.{sch}.{tbl}"


In [None]:
# Unity Catalog bootstrap (you may need permissions to create catalogs/schemas)
spark.sql(f"CREATE CATALOG IF NOT EXISTS {catalog}")
spark.sql(f"USE CATALOG {catalog}")
for sch in [schema_ref, schema_bronze, schema_silver, schema_gold]:
    spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.{sch}")


## bronze.shift_raw
Shift columns: (shift_id, facility_id, start_ts, end_ts, required_procedure_code, required_count, ingested_at)


In [None]:
from pyspark.sql.types import *
import random
import uuid
from datetime import datetime, timedelta

random.seed(SEED)
base_ts = datetime(2026, 1, 1, 8, 0, 0)

facility_ids = [r["facility_id"] for r in spark.read.table(fq(schema_ref, "facility")).select("facility_id").collect()]
procedure_codes = [r["procedure_code"] for r in spark.read.table(fq(schema_ref, "procedure")).select("procedure_code").collect()]

shift_rows = []
shift_i = 0
for day in range(DAYS_SCHEDULE):
    day_start = base_ts + timedelta(days=day)
    for fac in facility_ids:
        for block in [0, 1]:
            start_ts = day_start + timedelta(hours=block * 12)
            end_ts = start_ts + timedelta(hours=12)
            required_procedure_code = random.choice(procedure_codes)
            required_count = random.randint(1, 3)
            shift_id = str(uuid.uuid5(uuid.NAMESPACE_URL, f"shift-{SEED}-{shift_i}"))
            ingested_at = base_ts + timedelta(seconds=shift_i)
            shift_rows.append((shift_id, fac, start_ts, end_ts, required_procedure_code, required_count, ingested_at))
            shift_i += 1

shift_schema = StructType([
    StructField("shift_id", StringType(), False),
    StructField("facility_id", StringType(), False),
    StructField("start_ts", TimestampType(), False),
    StructField("end_ts", TimestampType(), False),
    StructField("required_procedure_code", StringType(), False),
    StructField("required_count", IntegerType(), False),
    StructField("ingested_at", TimestampType(), False)
])

shift_df = spark.createDataFrame(shift_rows, shift_schema)
shift_df.write.format("delta").mode("overwrite").option("overwriteSchema", "true").saveAsTable(fq(schema_bronze, "shift_raw"))


## bronze.assignment_raw
Assignment columns: (assignment_id, shift_id, provider_id, assigned_ts, assignment_status, source_system, ingested_at)


In [None]:
providers = [r["provider_id"] for r in spark.read.table(fq(schema_bronze, "provider_raw")).select("provider_id").collect()]

assignment_rows = []
as_i = 0
for s in shift_rows:
    shift_id, facility_id, start_ts, end_ts, proc_code, required_count, _ing = s
    # Intentional gaps: assign between 0 and required_count providers
    fill_n = random.randint(0, required_count)
    chosen = random.sample(providers, k=min(fill_n, len(providers)))
    for pid in chosen:
        assignment_id = str(uuid.uuid5(uuid.NAMESPACE_URL, f"assign-{SEED}-{as_i}"))
        assigned_ts = start_ts - timedelta(hours=random.randint(1, 48))
        assignment_status = random.choice(["ASSIGNED", "ASSIGNED", "CANCELED"])
        source_system = random.choice(["SCHED_SYS_A", "SCHED_SYS_B"])
        ingested_at = start_ts - timedelta(hours=1) + timedelta(seconds=as_i)
        assignment_rows.append((assignment_id, shift_id, pid, assigned_ts, assignment_status, source_system, ingested_at))
        as_i += 1

assign_schema = StructType([
    StructField("assignment_id", StringType(), False),
    StructField("shift_id", StringType(), False),
    StructField("provider_id", StringType(), False),
    StructField("assigned_ts", TimestampType(), False),
    StructField("assignment_status", StringType(), False),
    StructField("source_system", StringType(), False),
    StructField("ingested_at", TimestampType(), False)
])

assign_df = spark.createDataFrame(assignment_rows, assign_schema)
assign_df.write.format("delta").mode("overwrite").option("overwriteSchema", "true").saveAsTable(fq(schema_bronze, "assignment_raw"))


## Validate
Counts for both tables.


In [None]:
for t in ["shift_raw", "assignment_raw"]:
    print(f"{fq(schema_bronze, t)}: {spark.read.table(fq(schema_bronze, t)).count():,}")
