%md
## Pipeline: Silver to Gold - Table Patients

## Data Source

- **Catalog Location:**  `workspace.hospital_silver.patients`
- **Format:** Delta Lake Table


## Destination

- **Catalog Location:** `workspace.hospital_gold.patients`
- **Format:** Delta Lake Table 

# Configurations

In [0]:
entity = "patients"
gold_table = "dim_patients"

In [0]:
# Databricks Storage
catalog_name = "workspace"
schema_bronze = "hospital_bronze"
schema_silver = "hospital_silver"
schema_gold = "hospital_gold"

# data source path
data_source = "s3://buckethospitaldata/data_streaming/"

# for streaming: schema and checkpoint location (stored in data source S3 buckets)
checkpoint_location = f"s3://buckethospitaldata/pipeline_checkpoints/data_streaming/_checkpoints/gold/{entity}"

In [0]:
def read_from_silver(dimensional_table):
    dataframe = spark.readStream.table(f"{catalog_name}.{schema_silver}.{dimensional_table}")
    return dataframe



def write_to_gold(dataframe, checkpoint_location: str, gold_table: str):
    # Get existing target table schema
    target_schema = spark.table(f"{catalog_name}.{schema_gold}.{gold_table}").schema
    target_columns = [field.name for field in target_schema]

    # Filter input DataFrame to only include columns that match the target table
    common_columns = list(set(dataframe.columns) & set(target_columns))
    df_filtered = dataframe.select(common_columns)

    
    # Write to the gold layer using writeStream
    (
        df_filtered.writeStream
        .format("delta")
        .outputMode("append")
        .option("checkpointLocation", checkpoint_location)
        .trigger(once=True)
        .toTable(f"{catalog_name}.{schema_gold}.{gold_table}")
    )

## Read Data from Silver Layer

In [0]:
print(f"Preparing streaming write for '{entity}' table...")

df_patients = read_from_silver(entity).drop('rescued_data')

## Write data to Gold Layer

In [0]:
write_to_gold(df_patients, checkpoint_location, gold_table)

print(f"Write to {gold_table} initiated.")