# Pipeline: Bronze to Silver

## Data Source

- **Catalog Location:**  `workspace.hospital_bronze`
- **Dimensional Tables:** `cities, departments, diagnoses, procedures, providers, insurance`
- **Format:** Delta Lake Table


## Destination

- **Catalog Location:** `workspace.hospital_silver`
- **Dimensional Tables:** `cities, departments, diagnoses, procedures, providers, insurance`
- **Format:** Delta Lake Table 

In [0]:
dbutils.widgets.text("dimensional_table", "")
dimensional_table = dbutils.widgets.get("dimensional_table")

assert dimensional_table, "Missing required parameter: dimensional_table"

In [0]:
# Databricks Storage
catalog_name = "workspace"
schema_silver = "hospital_silver"
schema_bronze = "hospital_bronze"
schema_gold = "hospital_gold"

# data source path
data_source = "s3://buckethospitaldata/data_batching/"

# for streaming: schema and checkpoint location (stored in data source S3 buckets)
checkpoint_location = f"s3://buckethospitaldata/pipeline_checkpoints/data_batching/_checkpoints/silver/{dimensional_table}"

## Cities

In [0]:
if dimensional_table == 'cities':
  df = spark.readStream.table(f"{catalog_name}.{schema_bronze}.{dimensional_table}")


  (
    df.writeStream
    .format("delta")
    .option("checkpointLocation", checkpoint_location)
    .option("mergeSchema", "true")  # Optional but useful
    .outputMode("append")
    .trigger(once=True)
    .table(f"{schema_silver}.{dimensional_table}")
)


## Insurance

In [0]:
if dimensional_table == 'insurance':
  df = spark.readStream.table(f"{catalog_name}.{schema_bronze}.{dimensional_table}")

  (
    df.writeStream
    .format("delta")
    .option("checkpointLocation", checkpoint_location)
    .option("mergeSchema", "true")  # Optional but useful
    .outputMode("append")
    .trigger(once=True)
    .table(f"{schema_silver}.{dimensional_table}")
)


## Departments

In [0]:
if dimensional_table == 'departments':
  df = spark.readStream.table(f"{catalog_name}.{schema_bronze}.{dimensional_table}")

  (
    df.writeStream
    .format("delta")
    .option("checkpointLocation", checkpoint_location)
    .option("mergeSchema", "true")  # Optional but useful
    .outputMode("append")
    .trigger(once=True)
    .table(f"{schema_silver}.{dimensional_table}")
)


## Procedures

In [0]:
if dimensional_table == 'procedures':
  df_procedures = spark.readStream.table(f"{catalog_name}.{schema_bronze}.{dimensional_table}")

  (
    df_procedures.writeStream
    .format("delta")
    .option("checkpointLocation", checkpoint_location)
    .option("mergeSchema", "true")  # Optional but useful
    .outputMode("append")
    .trigger(once=True)
    .table(f"{schema_silver}.{dimensional_table}")
)


## Diagnoses

In [0]:
if dimensional_table == 'diagnoses':
  df_diagnoses = spark.readStream.table(f"{catalog_name}.{schema_bronze}.{dimensional_table}")

  (
    df_diagnoses.writeStream
    .format("delta")
    .option("checkpointLocation", checkpoint_location)
    .option("mergeSchema", "true")  # Optional but useful
    .outputMode("append")
    .trigger(once=True)
    .table(f"{schema_silver}.{dimensional_table}")
)


## Providers

In [0]:
if dimensional_table == 'providers':
  df_providers = spark.readStream.table(f"{catalog_name}.{schema_bronze}.{dimensional_table}")

  (
    df_providers.writeStream
    .format("delta")
    .option("checkpointLocation", checkpoint_location)
    .option("mergeSchema", "true")  # Optional but useful
    .outputMode("append")
    .trigger(once=True)
    .table(f"{schema_silver}.{dimensional_table}")
)
