In [36]:
# Import JSON and Path libraries to open and explore the data inside JSON file
import json
from pathlib import Path

# Create data path
data_dir = Path("../data/fhir/")

# Pick and open random json file from a directory to see main keys
first_file = next(data_dir.glob("*.json"))

with open(first_file, "r") as f:
    data = json.load(f)

print(data.keys())


dict_keys(['resourceType', 'type', 'entry'])


In [37]:
# Count how many entries (resources) are in the bundle
print("Number of resources:", len(data["entry"]))

# Access the first entry in the bundle
first_entry = data["entry"][0]
print(first_entry.keys())


Number of resources: 442
dict_keys(['fullUrl', 'resource', 'request'])


In [38]:
# Check resource type
resource_type = first_entry["resource"]["resourceType"]
print(resource_type)

# Each entry in a FHIR bundle represents a "resource", which is a single piece of healthcare information.
# The resourceType tells us what kind of data it is, e.g.:
# - "Patient": demographic info about the patient
# - "Encounter": a visit or interaction at a healthcare facility
# - "Observation": a clinical measurement, lab result, or vital sign
# - "Procedure": a medical procedure performed
# - "MedicationRequest": prescriptions or medication orders
# - "ExplanationOfBenefit": billing/insurance info for a visit
# Knowing the resource type helps us know which keys and data fields to explore.


Patient


In [39]:
# Look at the keys inside the actual FHIR resource of the first entry
print(first_entry["resource"].keys())

dict_keys(['resourceType', 'id', 'meta', 'text', 'extension', 'identifier', 'name', 'telecom', 'gender', 'birthDate', 'address', 'maritalStatus', 'multipleBirthBoolean', 'communication'])


In [40]:
# Access a specific field of the first resource, e.g., gender (only for Patient resource)
print(first_entry["resource"]["gender"])

male


In [41]:
# Look at first entry resource: basic patient info
patient = first_entry["resource"]

print("ID:", patient.get("id"))
print("Name:", patient.get("name"))
print("Gender:", patient.get("gender"))
print("BirthDate:", patient.get("birthDate"))
print("Deceased:", patient.get("deceasedDateTime"))
print("Address:", patient.get("address"))


ID: 92fb7efc-5cfd-f8d3-927b-42f8ee099531
Name: [{'use': 'official', 'family': "O'Reilly797", 'given': ['Ahmed109']}]
Gender: male
BirthDate: 2013-06-12
Deceased: None
Address: [{'extension': [{'url': 'http://hl7.org/fhir/StructureDefinition/geolocation', 'extension': [{'url': 'latitude', 'valueDecimal': 42.25416489604414}, {'url': 'longitude', 'valueDecimal': -71.70092510023379}]}], 'line': ['857 Kling Arcade'], 'city': 'Worcester', 'state': 'MA', 'postalCode': '01604', 'country': 'US'}]


In [15]:
# Count how many resources of each type exist in this patient record
# We go through all entries, get the 'resourceType' of each, and use Counter to summarize
from collections import Counter

resource_types = [element["resource"]["resourceType"] for element in data["entry"]]
print(Counter(resource_types))


Counter({'Observation': 216, 'Immunization': 35, 'Claim': 34, 'DiagnosticReport': 32, 'Encounter': 27, 'DocumentReference': 27, 'ExplanationOfBenefit': 27, 'Procedure': 13, 'Condition': 10, 'MedicationRequest': 7, 'CareTeam': 5, 'CarePlan': 5, 'Patient': 1, 'AllergyIntolerance': 1, 'ImagingStudy': 1, 'Provenance': 1})


In [42]:
# Extract Patient resources from JSON into a list of dictionaries
patient_info = []

for entry in data["entry"]:
    resource = entry["resource"]
    if resource["resourceType"] == "Patient":
        patient_info.append({
            "id": resource.get("id"),
            "gender": resource.get("gender"),
            "birthDate": resource.get("birthDate"),
            "deceased": resource.get("deceasedDateTime"),
            "family_name": resource["name"][0]["family"],
            "given_name": resource["name"][0]["given"][0],
            "city": resource["address"][0]["city"] if resource.get("address") else None,
            "state": resource["address"][0]["state"] if resource.get("address") else None,
            "country": resource["address"][0]["country"] if resource.get("address") else None,
        })

patient_info


[{'id': '92fb7efc-5cfd-f8d3-927b-42f8ee099531',
  'gender': 'male',
  'birthDate': '2013-06-12',
  'deceased': None,
  'family_name': "O'Reilly797",
  'given_name': 'Ahmed109',
  'city': 'Worcester',
  'state': 'MA',
  'country': 'US'}]

In [43]:
first_entry = data["entry"][0]        # get the first entry
resource = first_entry["resource"]    # get the resource inside that entry
marital_status = resource["maritalStatus"]["text"]  # get text from maritalStatus
print(marital_status)


Never Married


In [44]:
# Convert the list into a pandas DataFrame for easy viewing
import pandas as pd
patient_info_df = pd.DataFrame(patient_info)
print(patient_info_df.head())

                                     id gender   birthDate deceased  \
0  92fb7efc-5cfd-f8d3-927b-42f8ee099531   male  2013-06-12     None   

   family_name given_name       city state country  
0  O'Reilly797   Ahmed109  Worcester    MA      US  
