
## üü¢ SILVER LAYER ‚Äì PART 1

ü•à STEP 1: Understand FHIR Patient & Encounter (Beginner)
Patient

Appears once per file

Has:

id

gender

birthDate

maritalStatus

deceasedDateTime

Encounter

Appears many times per patient

Has:

id

subject.reference ‚Üí patient

period.start / period.end

type

status

### üß© INPUT TO SILVER (WHAT WE WILL USE)

We will NOT read raw JSON again.
We will ONLY read from Bronze VIEW:

healthcare.fhir_healthcare_analytics_bronze.fhir_entry_view


This view already gives us:

One row per FHIR resource

Metadata (source_file, ingest_time)

###ü•à STEP 2: Load Bronze exploded VIEW


In [0]:
silver_source_df = spark.table(
    "healthcare.fhir_healthcare_analytics_bronze.fhir_entry_view"
)

# ü•à STEP 3: Build SILVER PATIENT table

Why this is easy

One Patient resource per file

No joins needed yet

üßæ Extract Patient data


#### Use or a Detailed Patient Dashboard :

from pyspark.sql.functions import col, get_json_object

patient_df = silver_source_df \
    .filter(col("entry.resource.resourceType") == "Patient") \
    .select(
        # Core identifiers
        col("entry.resource.id").alias("patient_id"),

        # Demographics
        col("entry.resource.gender").alias("gender"),
        col("entry.resource.birthDate").alias("birth_date"),
        col("entry.resource.deceasedDateTime").alias("deceased_datetime"),
        col("entry.resource.maritalStatus.text").alias("marital_status"),

        # Address ‚Üí STRING ‚Üí use get_json_object
        get_json_object(col("entry.resource.address"), "$[0].city").alias("city"),
        get_json_object(col("entry.resource.address"), "$[0].state").alias("state"),
        get_json_object(col("entry.resource.address"), "$[0].country").alias("country"),
        get_json_object(col("entry.resource.address"), "$[0].postalCode").alias("postal_code"),

        # Language ‚Üí ARRAY<STRUCT> ‚Üí direct access
        col("entry.resource.communication")[0]["language"]["text"]
            .alias("preferred_language"),

        col("entry.resource.communication")[0]["language"]["coding"][0]["code"]
            .alias("language_code"),

        # Phone ‚Üí ARRAY<STRUCT> ‚Üí direct access
        col("entry.resource.telecom")[0]["value"].alias("phone_number"),

        # Metadata
        col("source_file"),
        col("ingest_time")
    )


   #### Use for small data :
   from pyspark.sql.functions import col

patient_df = silver_source_df \
    .filter(col("entry.resource.resourceType") == "Patient") \
    .select(
        col("entry.resource.id").alias("patient_id"),
        col("entry.resource.gender").alias("gender"),
        col("entry.resource.birthDate").alias("birth_date"),
        col("entry.resource.deceasedDateTime").alias("deceased_datetime"),
        col("entry.resource.maritalStatus.text").alias("marital_status"),
        col("source_file"),
        col("ingest_time")
    )


   




In [0]:
from pyspark.sql.functions import col, get_json_object

patient_df = silver_source_df \
    .filter(col("entry.resource.resourceType") == "Patient") \
    .select(
        # Core identifiers
        col("entry.resource.id").alias("patient_id"),

        # Demographics
        col("entry.resource.gender").alias("gender"),
        col("entry.resource.birthDate").alias("birth_date"),
        col("entry.resource.deceasedDateTime").alias("deceased_datetime"),
        col("entry.resource.maritalStatus.text").alias("marital_status"),

        # Address ‚Üí STRING ‚Üí use get_json_object
        get_json_object(col("entry.resource.address"), "$[0].city").alias("city"),
        get_json_object(col("entry.resource.address"), "$[0].state").alias("state"),
        get_json_object(col("entry.resource.address"), "$[0].country").alias("country"),
        get_json_object(col("entry.resource.address"), "$[0].postalCode").alias("postal_code"),

        # Language ‚Üí ARRAY<STRUCT> ‚Üí direct access
        col("entry.resource.communication")[0]["language"]["text"]
            .alias("preferred_language"),

        col("entry.resource.communication")[0]["language"]["coding"][0]["code"]
            .alias("language_code"),

        # Phone ‚Üí ARRAY<STRUCT> ‚Üí direct access
        col("entry.resource.telecom")[0]["value"].alias("phone_number"),

        # Metadata
        col("source_file"),
        col("ingest_time")
    )


üßº Deduplicate Encounters

In [0]:
patient_df = patient_df.dropDuplicates(["patient_id"])


üíæ Write Silver Patient table (Unity Catalog)


In [0]:
patient_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable(
        "healthcare.fhir_healthcare_analytics_silver.patient"
    )


‚úÖ Validate Silver Patient table

In [0]:
%sql
SELECT COUNT(*) 
FROM healthcare.fhir_healthcare_analytics_silver.patient;


In [0]:
%sql

select * from healthcare.fhir_healthcare_analytics_silver.patient

###ü•à STEP 4: Build SILVER ENCOUNTER table
Key concept (VERY IMPORTANT)

FHIR stores:

Encounter.subject.reference = "Patient/<patient_id>"


We must extract patient_id from this string.

###üßæ Extract Encounter data

In [0]:
from pyspark.sql.functions import col, regexp_extract, when

encounter_df = silver_source_df \
    .filter(col("entry.resource.resourceType") == "Encounter") \
    .select(
        # Encounter ID
        col("entry.resource.id").alias("encounter_id"),

        # ‚úÖ FIXED patient_id extraction (UUID-based)
        regexp_extract(
            col("entry.resource.subject.reference"),
            "urn:uuid:(.*)",
            1
        ).alias("patient_id"),

        # Timing
        col("entry.resource.period.start").alias("admit_time"),
        col("entry.resource.period.end").alias("discharge_time"),

        # Status
        col("entry.resource.status").alias("status"),

        # Encounter type (keep raw or parse later)
        col("entry.resource.type").alias("encounter_type"),

        # Metadata
        col("source_file"),
        col("ingest_time")
    )


üßº Deduplicate Encounters


In [0]:
encounter_df = encounter_df.dropDuplicates(["encounter_id"])

üíæ Write Silver Encounter table


In [0]:
encounter_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable(
        "healthcare.fhir_healthcare_analytics_silver.encounter"
    )

‚úÖ Validate Silver Encounter table


You should see many more rows than patients (correct).

In [0]:
%sql
SELECT COUNT(*) 
FROM healthcare.fhir_healthcare_analytics_silver.encounter;

In [0]:
%sql 

SELECT * FROM healthcare.fhir_healthcare_analytics_silver.encounter;

In [0]:
%sql
SELECT encounter_id, encounter_type
FROM healthcare.fhir_healthcare_analytics_silver.encounter
LIMIT 10;


üß† WHAT YOU HAVE ACHIEVED (IMPORTANT)

‚úî Raw ‚Üí Bronze ‚Üí Silver pipeline

‚úî One row = one real-world entity

‚úî FHIR references resolved

‚úî Unity Catalog‚Äìgoverned tables

‚úî Medallion architecture followed

### üß† BEFORE WE START (VERY IMPORTANT)

How FHIR links data (simple view)

Patient

   ‚Üì

Encounter

   ‚Üì

Condition (Diagnosis)

   ‚Üì

Observation

   ‚Üì

Claim / EOB

FHIR uses references as strings, not joins:

"reference": "Encounter/abc123"


üëâ Silver‚Äôs job = extract IDs from references and normalize

## ü•à SILVER LAYER ‚Äì PART 2

INPUT (same as before)

In [0]:
silver_source_df = spark.table(
    "healthcare.fhir_healthcare_analytics_bronze.fhir_entry_view"
)


### ü•à STEP 1: SILVER CONDITION (Diagnosis)
What is a Condition?

Diagnosis like Diabetes, Hypertension, Infection

Many Conditions per Encounter

References:

subject.reference ‚Üí Patient

encounter.reference ‚Üí Encounter

### üßæ Extract Condition data (FHIR-safe)


In [0]:
from pyspark.sql.functions import col, regexp_extract, get_json_object

condition_df = silver_source_df \
    .filter(col("entry.resource.resourceType") == "Condition") \
    .select(
        # IDs
        col("entry.resource.id").alias("condition_id"),

        # ‚úÖ UUID-safe patient reference
        regexp_extract(
            col("entry.resource.subject.reference"),
            "urn:uuid:(.*)",
            1
        ).alias("patient_id"),

        # ‚úÖ UUID-safe encounter reference
        regexp_extract(
            col("entry.resource.encounter.reference"),
            "urn:uuid:(.*)",
            1
        ).alias("encounter_id"),

        # ‚úÖ Diagnosis text (code is STRING ‚Üí JSON extract)
        get_json_object(
            col("entry.resource.code"),
            "$.text"
        ).alias("diagnosis"),

        # ‚úÖ Clinical status (STRUCT ‚Üí direct access)
        col("entry.resource.clinicalStatus.coding")[0]["code"]
            .alias("clinical_status"),

        # Dates
        col("entry.resource.onsetDateTime").alias("onset_time"),
        col("entry.resource.recordedDate").alias("recorded_date"),

        # Metadata
        col("source_file"),
        col("ingest_time")
    )


üßº Deduplicate Condition


In [0]:
condition_df = condition_df.dropDuplicates(["condition_id"])

üíæ Write Silver Condition table


In [0]:
condition_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable(
        "healthcare.fhir_healthcare_analytics_silver.condition"
    )

In [0]:
%sql
SELECT COUNT(*) 
FROM healthcare.fhir_healthcare_analytics_silver.condition;


In [0]:
%sql
SELECT * FROM healthcare.fhir_healthcare_analytics_silver.condition;

###ü•à STEP 2: SILVER OBSERVATION
What is an Observation?

Vitals, lab results, measurements

Many Observations per Encounter

Examples:

Blood pressure

Heart rate

Lab test values

### üßæ Extract Observation data (FHIR-safe)


In [0]:
from pyspark.sql.functions import col, regexp_extract

observation_df = silver_source_df \
    .filter(col("entry.resource.resourceType") == "Observation") \
    .select(
        # IDs
        col("entry.resource.id").alias("observation_id"),

        # UUID-safe references
        regexp_extract(
            col("entry.resource.subject.reference"),
            "urn:uuid:(.*)",
            1
        ).alias("patient_id"),

        regexp_extract(
            col("entry.resource.encounter.reference"),
            "urn:uuid:(.*)",
            1
        ).alias("encounter_id"),

        # Observation name (code is STRING in your data)
        col("entry.resource.code").alias("observation_code_raw"),

        # ‚úÖ STRUCT-safe value extraction
        col("entry.resource.valueQuantity.value")
            .alias("value"),

        col("entry.resource.valueQuantity.unit")
            .alias("unit"),

        # Time
        col("entry.resource.effectiveDateTime")
            .alias("observation_time"),

        # Metadata
        col("source_file"),
        col("ingest_time")
    )


In [0]:
observation_df = observation_df.dropDuplicates(["observation_id"])


üíæ Write Silver Observation table


In [0]:
observation_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable(
        "healthcare.fhir_healthcare_analytics_silver.observation"
    )

In [0]:
%sql
SELECT COUNT(*) 
FROM healthcare.fhir_healthcare_analytics_silver.observation;


In [0]:
%sql
select * from healthcare.fhir_healthcare_analytics_silver.observation;

ü•à STEP 3: SILVER CLAIM / EXPLANATION OF BENEFIT

FHIR uses:

Claim

ExplanationOfBenefit

Synthea usually generates ExplanationOfBenefit.

### üßæ Extract EOB data


In [0]:
from pyspark.sql.functions import col, regexp_extract, get_json_object

eob_df = silver_source_df \
    .filter(col("entry.resource.resourceType") == "ExplanationOfBenefit") \
    .select(
        # ID
        col("entry.resource.id").alias("eob_id"),

        # ‚úÖ UUID-safe patient reference
        regexp_extract(
            col("entry.resource.patient.reference"),
            "urn:uuid:(.*)",
            1
        ).alias("patient_id"),

        # ‚úÖ UUID-safe encounter reference
        regexp_extract(
            col("entry.resource.encounter.reference"),
            "urn:uuid:(.*)",
            1
        ).alias("encounter_id"),

        # ‚úÖ Claim type (type is STRING)
        get_json_object(
            col("entry.resource.type"),
            "$.coding[0].display"
        ).alias("claim_type"),

        # ‚úÖ Total amount (FHIR array ‚Üí JSON extract)
        get_json_object(
            col("entry.resource.total"),
            "$[0].amount.value"
        ).cast("double").alias("total_amount"),

        # Status
        col("entry.resource.status").alias("claim_status"),

        # Metadata
        col("source_file"),
        col("ingest_time")
    )


üßº Deduplicate EOB


In [0]:
eob_df = eob_df.dropDuplicates(["eob_id"])

üíæ Write Silver EOB table


In [0]:
eob_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable(
        "healthcare.fhir_healthcare_analytics_silver.explanation_of_benefit"
    )

‚úÖ Validate EOB


In [0]:
%sql
SELECT COUNT(*) 
FROM healthcare.fhir_healthcare_analytics_silver.explanation_of_benefit;

In [0]:
%sql
select * from healthcare.fhir_healthcare_analytics_silver.explanation_of_benefit;

We will build FOUR Silver tables in this exact order:

1Ô∏è‚É£ Procedure (treatments performed)

2Ô∏è‚É£ MedicationRequest (prescriptions)

3Ô∏è‚É£ Immunization (preventive care)

4Ô∏è‚É£ DiagnosticReport (lab & imaging reports)

All tables will:

Read from Bronze exploded VIEW

Resolve Patient & Encounter references

Be Unity Catalog compliant

Be FHIR-safe (no assumptions about schema)

### üß† INPUT (same as earlier ‚Äì do NOT change)

In [0]:
silver_source_df = spark.table(
    "healthcare.fhir_healthcare_analytics_bronze.fhir_entry_view"
)


### ü•à STEP 1: SILVER PROCEDURE TABLE
Why this matters

Shows what treatment was performed

Used for treatment trends dashboard

### üßæ Extract Procedure data


In [0]:
from pyspark.sql.functions import col, regexp_extract, get_json_object

procedure_df = silver_source_df \
    .filter(col("entry.resource.resourceType") == "Procedure") \
    .select(
        # IDs
        col("entry.resource.id").alias("procedure_id"),

        # UUID-safe references
        regexp_extract(
            col("entry.resource.subject.reference"),
            "urn:uuid:(.*)",
            1
        ).alias("patient_id"),

        regexp_extract(
            col("entry.resource.encounter.reference"),
            "urn:uuid:(.*)",
            1
        ).alias("encounter_id"),

        # Procedure name (code is STRING)
        get_json_object(
            col("entry.resource.code"),
            "$.text"
        ).alias("procedure_name"),

        # Status
        col("entry.resource.status").alias("procedure_status"),

        # ‚úÖ Correct timing field
        col("entry.resource.occurrenceDateTime").alias("performed_time"),

        # Metadata
        col("source_file"),
        col("ingest_time")
    )


üßº Deduplicate


In [0]:
procedure_df = procedure_df.dropDuplicates(["procedure_id"])

üíæ Write Silver Procedure table


In [0]:
procedure_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable(
        "healthcare.fhir_healthcare_analytics_silver.procedure"
    )


‚úÖ Validate


In [0]:
%sql
SELECT COUNT(*) 
FROM healthcare.fhir_healthcare_analytics_silver.procedure;

In [0]:
%sql
select *
from healthcare.fhir_healthcare_analytics_silver.procedure
limit 10

### ü•à STEP 2: SILVER MEDICATION REQUEST TABLE
Why this matters

Shows what doctors prescribed

Enables medication trend analysis

### üßæ Extract MedicationRequest data (FHIR-safe)

In [0]:
from pyspark.sql.functions import col, regexp_extract

medication_request_df = silver_source_df \
    .filter(col("entry.resource.resourceType") == "MedicationRequest") \
    .select(
        # ID
        col("entry.resource.id").alias("medication_request_id"),

        # ‚úÖ UUID-safe patient reference
        regexp_extract(
            col("entry.resource.subject.reference"),
            "urn:uuid:(.*)",
            1
        ).alias("patient_id"),

        # ‚úÖ UUID-safe encounter reference
        regexp_extract(
            col("entry.resource.encounter.reference"),
            "urn:uuid:(.*)",
            1
        ).alias("encounter_id"),

        # ‚úÖ STRUCT-safe medication name
        col("entry.resource.medicationCodeableConcept.text")
            .alias("medication_name"),

        # Status & timing
        col("entry.resource.status").alias("status"),
        col("entry.resource.authoredOn").alias("prescribed_date"),

        # Metadata
        col("source_file"),
        col("ingest_time")
    )


üßº Deduplicate


In [0]:
medication_request_df = medication_request_df.dropDuplicates(
    ["medication_request_id"]
)

üíæ Write Silver MedicationRequest table


In [0]:
medication_request_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable(
        "healthcare.fhir_healthcare_analytics_silver.medication_request"
    )

In [0]:
%sql
SELECT COUNT(*) 
FROM healthcare.fhir_healthcare_analytics_silver.medication_request;


In [0]:
%sql
SELECT *
FROM healthcare.fhir_healthcare_analytics_silver.medication_request;
    


ü•à STEP 3: SILVER IMMUNIZATION TABLE
Why this matters

Preventive healthcare

Public health dashboards

### üßæ Extract Immunization data


In [0]:
from pyspark.sql.functions import col, regexp_extract

immunization_df = silver_source_df \
    .filter(col("entry.resource.resourceType") == "Immunization") \
    .select(
        # ID
        col("entry.resource.id").alias("immunization_id"),

        # ‚úÖ UUID-safe references
        regexp_extract(
            col("entry.resource.patient.reference"),
            "urn:uuid:(.*)",
            1
        ).alias("patient_id"),

        regexp_extract(
            col("entry.resource.encounter.reference"),
            "urn:uuid:(.*)",
            1
        ).alias("encounter_id"),

        # Vaccine info
        col("entry.resource.vaccineCode.text").alias("vaccine_name"),
        col("entry.resource.status").alias("status"),
        col("entry.resource.occurrenceDateTime").alias("vaccination_date"),

        # Metadata
        col("source_file"),
        col("ingest_time")
    )


In [0]:
immunization_df = immunization_df.dropDuplicates(["immunization_id"])


In [0]:
immunization_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable(
        "healthcare.fhir_healthcare_analytics_silver.immunization"
    )


In [0]:
%sql
SELECT COUNT(*) 
FROM healthcare.fhir_healthcare_analytics_silver.immunization;


In [0]:
%sql
SELECT *
FROM healthcare.fhir_healthcare_analytics_silver.immunization;

### üü¢ Silver ‚Äì DiagnosticReport
What we extract

report_id

patient_id

encounter_id

report_type

report_status

effective_time

ü•à STEP 4: SILVER DIAGNOSTIC REPORT TABLE
Why this matters

Groups observations

Lab & imaging analytics

### üßæ Extract DiagnosticReport data


In [0]:
from pyspark.sql.functions import col, regexp_extract, get_json_object

diagnostic_report_df = silver_source_df \
    .filter(col("entry.resource.resourceType") == "DiagnosticReport") \
    .select(
        col("entry.resource.id").alias("diagnostic_report_id"),

        regexp_extract(
            col("entry.resource.subject.reference"),
            "urn:uuid:(.*)", 1
        ).alias("patient_id"),

        regexp_extract(
            col("entry.resource.encounter.reference"),
            "urn:uuid:(.*)", 1
        ).alias("encounter_id"),

        get_json_object(
            col("entry.resource.code"),
            "$.text"
        ).alias("report_name"),

        col("entry.resource.status").alias("status"),
        col("entry.resource.effectiveDateTime").alias("report_time"),

        col("source_file"),
        col("ingest_time")
    )


In [0]:
diagnostic_report_df = diagnostic_report_df.dropDuplicates(
    ["diagnostic_report_id"]
)


üíæ Write Silver DiagnosticReport table


In [0]:
diagnostic_report_df.write \
    .format("delta") \
    .mode("overwrite") \
    .saveAsTable(
        "healthcare.fhir_healthcare_analytics_silver.diagnostic_report"
    )

‚úÖ Validate


In [0]:
%sql
SELECT COUNT(*) 
FROM healthcare.fhir_healthcare_analytics_silver.diagnostic_report;

In [0]:
%sql
SELECT * FROM healthcare.fhir_healthcare_analytics_silver.diagnostic_report;

You currently have these Silver tables built (from this conversation):

### ‚úÖ Silver tables available

patient

encounter

condition

observation

explanation_of_benefit

procedure

medication_request

immunization

diagnostic_report

üëâ That‚Äôs 9 FHIR resource types, which is more than enough for a strong POC.


Now we will update your GOLD view to include ALL of THEM, while keeping joins safe (LEFT JOIN) so data never disappears.

### ü•à STEP 4: FINAL LINK VALIDATION (MOST IMPORTANT)
Prove everything is connected


‚úî References resolved
‚úî Healthcare timeline visible
‚úî Silver layer complete

## ü•á UPDATED GOLD VIEW
(All 9 FHIR Resource Types Connected)

### üß± FINAL: GOLD VIEW WITH ALL RESOURCE TYPES (9‚Äì10 TABLES)

You now have these Silver tables:

patient

encounter

condition

observation

explanation_of_benefit

procedure

medication_request

immunization

diagnostic_report

Below is a correct, production-grade Gold view joining ALL of them.

### ü•á FINAL GOLD VIEW (CORRECT & COMPLETE)

In [0]:
%sql
CREATE OR REPLACE VIEW healthcare.fhir_healthcare_analytics_gold.patient_encounter_summary AS
SELECT
  -- Core
  p.patient_id,
  e.encounter_id,

  -- Encounter timeline
  e.admit_time,
  e.discharge_time,
  e.status AS encounter_status,
  e.encounter_type,

  -- Clinical
  c.diagnosis,
  o.observation_code_raw     AS observation_name,
  o.value                    AS observation_value,
  o.unit                     AS observation_unit,

  -- Procedures
  pr.procedure_name,
  pr.procedure_status,
  pr.performed_time,

  -- Medications
  mr.medication_name,
  mr.status                  AS medication_status,
  mr.prescribed_date,

  -- Immunization
  im.vaccine_name,
  im.status                  AS immunization_status,
  im.vaccination_date,

  -- Diagnostic reports
  dr.report_name,
  dr.status                  AS report_status,
  dr.report_time,

  -- Financial
  eob.claim_type,
  eob.total_amount,
  eob.claim_status

FROM healthcare.fhir_healthcare_analytics_silver.patient p

JOIN healthcare.fhir_healthcare_analytics_silver.encounter e
  ON p.patient_id = e.patient_id

LEFT JOIN healthcare.fhir_healthcare_analytics_silver.condition c
  ON e.encounter_id = c.encounter_id

LEFT JOIN healthcare.fhir_healthcare_analytics_silver.observation o
  ON e.encounter_id = o.encounter_id

LEFT JOIN healthcare.fhir_healthcare_analytics_silver.procedure pr
  ON e.encounter_id = pr.encounter_id

LEFT JOIN healthcare.fhir_healthcare_analytics_silver.medication_request mr
  ON e.encounter_id = mr.encounter_id

LEFT JOIN healthcare.fhir_healthcare_analytics_silver.immunization im
  ON e.encounter_id = im.encounter_id

LEFT JOIN healthcare.fhir_healthcare_analytics_silver.diagnostic_report dr
  ON e.encounter_id = dr.encounter_id

LEFT JOIN healthcare.fhir_healthcare_analytics_silver.explanation_of_benefit eob
  ON e.encounter_id = eob.encounter_id;


In [0]:
%sql
SELECT *
FROM healthcare.fhir_healthcare_analytics_gold.patient_encounter_summary
LIMIT 20;
