In [0]:
%sql 
describe ankurnayyar_cat1.default.bronze_events_patient_data

In [0]:
import pandas as pd
from datetime import datetime

# =========================================================
# 1️⃣ Define sample data matching the Bronze schema
# =========================================================
data = [
    {
        "ID": "23",
        "Shard": "2115",
        "Private": "false",
        "Name": "9T5FA1",
        "Address1": "Main Ave 123",
        "Address2": "",
        "City": "Phoenix",
        "State": "TX",
        "ZipCode": "50995",
        "Country": "US",
        "PhoneNumber": "5551232255",
        "BusinessID": "7af92634b046e8bb",
        "Created": "1694438408",
        "CreatedBy": "c4af8d09-50a5-11ee-8a8f-26f091458896",
        "Updated": "1694438408",
        "UpdatedBy": "c4af8d09-50a5-11ee-8a8f-26f091458896",
        "V": "",
        "D": '{"address1":"Main Ave 123","address2":"","businessId":"7af92634b046e8bb","city":"Phoenix","country":"US","created":1694438408,"createdBy":"c4af8d09-50a5-11ee-8a8f-26f091458896","id":"ed9921e2-50a5-11ee-8205-02223a2aacf2","name":"9T5FA1","phoneNumber":"5551232255","private":false,"programs":null,"records":null,"shard":2115,"state":"TX","updated":1694438408,"updatedBy":"c4af8d09-50a5-11ee-8a8f-26f091458896","zipCode":"50995","abbott_newfield":"AnkurNayyar", "zipCode123":"50995"}',
        "P": ""
    }
]

# Convert to DataFrame
df = pd.DataFrame(data)

# =========================================================
# 2️⃣ Define the output TSV file path
# =========================================================
output_path = "/Volumes/ankurnayyar_cat1/demo_schema/jsonschema/sample_data.tsv"

# =========================================================
# 3️⃣ Save as TSV
# =========================================================
df.to_csv(output_path, sep="\t", index=False, header=True)
print(f"TSV file written to: {output_path}")


In [0]:
%sql

select * from ankurnayyar_cat1.demo_schema.silver_events_patient_data_scd2
where ID in ("23", "ED9921E2-50A5-11EE-8205-02223A2AACF2")

In [0]:
%sql
SELECT * FROM event_log("6480d4bf-ca26-4932-a0c9-fb472810dc3c");



In [0]:
%sql
WITH schema_history AS (
  SELECT
    timestamp,
    details:flow_definition.output_dataset AS output_table,
    details:flow_definition.schema AS schema_json,
    ROW_NUMBER() OVER (PARTITION BY details:flow_definition.output_dataset ORDER BY timestamp) AS rn
  FROM event_log("6480d4bf-ca26-4932-a0c9-fb472810dc3c")
  WHERE event_type = 'flow_definition'
)
SELECT
  s1.output_table,
  s1.schema_json AS before_schema,
  s2.schema_json AS after_schema,
  s1.timestamp AS before_ts,
  s2.timestamp AS after_ts
FROM schema_history s1
JOIN schema_history s2
  ON s1.output_table = s2.output_table
 AND s2.rn = s1.rn + 1
WHERE s1.output_table = "ankurnayyar_cat1.demo_schema.silver_events_patient_data"
ORDER BY s2.timestamp DESC;

In [0]:
%sql
WITH schema_history AS (
  SELECT
    timestamp,
    details:flow_definition.output_dataset AS output_table,
    details:flow_definition.schema AS schema_json,
    ROW_NUMBER() OVER (
      PARTITION BY details:flow_definition.output_dataset
      ORDER BY timestamp
    ) AS rn_asc,
    ROW_NUMBER() OVER (
      PARTITION BY details:flow_definition.output_dataset
      ORDER BY timestamp DESC
    ) AS rn_desc
  FROM event_log("6480d4bf-ca26-4932-a0c9-fb472810dc3c")
  WHERE event_type = 'flow_definition'
),
first_last AS (
  SELECT
    f.output_table,
    f.timestamp AS first_ts,
    f.schema_json AS before_schema,
    l.timestamp AS last_ts,
    l.schema_json AS after_schema
  FROM schema_history f
  JOIN schema_history l
    ON f.output_table = l.output_table
  WHERE f.rn_asc = 1
    AND l.rn_desc = 1
),
before_cols AS (
  SELECT
    output_table,
    explode(from_json(before_schema, 'array<struct<name:string,path:array<string>,data_type:string>>')) AS col
  FROM first_last
),
after_cols AS (
  SELECT
    output_table,
    explode(from_json(after_schema, 'array<struct<name:string,path:array<string>,data_type:string>>')) AS col
  FROM first_last
)
-- matched and unmatched columns
SELECT
  a.output_table,
  a.col.name AS column_name,
  a.col.data_type AS column_type,
  CASE
    WHEN b.col.name IS NULL THEN 'NEW'       -- column in after, not in before
    ELSE 'MATCHED'                           -- column exists in both
  END AS status,
  fl.first_ts,
  fl.last_ts
FROM after_cols a
LEFT JOIN before_cols b
  ON a.output_table = b.output_table
 AND a.col.name = b.col.name
JOIN first_last fl
  ON a.output_table = fl.output_table
where a.output_table = 'ankurnayyar_cat1.demo_schema.silver_events_patient_data'
ORDER BY fl.last_ts DESC, a.output_table;
