### Example Exploratory Notebook

Use this notebook to explore the data generated by the pipeline in your preferred programming language.

**Note**: This notebook is not executed as part of the pipeline.

In [0]:
# Notebook to generate DLT schema
import pandas as pd
from pyspark.sql.functions import lit
from pyspark.sql import SparkSession

# Assumes you have a SparkSession
spark = SparkSession.builder.appName("SchemaGenerator").getOrCreate()

# Read the keys from the tracking table
keys_df = spark.sql("SELECT key_name FROM silver_device_patient_key_tracking")
key_names = [row.key_name for row in keys_df.distinct().collect()]

# Generate the STRUCT schema for from_json
schema_parts = [f"`{key}`: STRING" for key in key_names]
struct_schema = f"STRUCT<{', '.join(schema_parts)}>"

print("--- Generated STRUCT Schema ---")
print(struct_schema)

# Generate the SELECT statement for the CDC flow
select_clauses = [f"json_data.`{key}`" for key in key_names]
select_statement = f"SELECT PatientID, Shard, ... , {', '.join(select_clauses)} FROM parsed WHERE ..."

print("\n--- Generated SELECT Statement ---")
print(select_statement)

# You can also generate the CREATE TABLE statement similarly
table_schema_parts = [f"`{key}` STRING" for key in key_names]
table_schema_statement = f"CREATE OR REFRESH STREAMING TABLE silver_device_patient_scd2 (...) {', '.join(table_schema_parts)} (...)"

print("\n--- Generated CREATE TABLE Statement ---")
print(table_schema_statement)
