A lot of Python developers have some affinity with data science and are used to work with Pandas DataFrames (or similar). But turning FHIR Resources in FHIR DataFrames is hard. You could setup a Spark environment or start using BigQuery, because they have support for hierarchical structures. But there is one tool that can help when you quickly need to do some analytics localy: **pola.rs**

In [20]:
# First downooad some Synthea generated data
! ./download_sample_bundle.sh

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 81.1M  100 81.1M    0     0   829k      0  0:01:40  0:01:40 --:--:--  984k  0      0 --:--:--  0:00:05 --:--:--     0 0   285k      0  0:04:51  0:00:14  0:04:37  881k
r4.bundle.zip: OK
Archive:  r4.bundle.zip
replace fhir/Robert854_Botsford977_148ad83c-4dbc-4cb6-9334-44e6886f1e42.json? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
from pathlib import Path
from tqdm import tqdm
import r4

# The data is stored in the fhir folder and contains a bundle for each patient
# Now extract all the patients and conditions from the Bundles

NUM_PATIENTS = 10
conditions = []
patients = []
for i,bundle_path in tqdm(enumerate(Path("./fhir").glob("*.json")), desc="Parse Bundle"):
    bundle = r4.Bundle.parse_file(bundle_path)
    for entry in tqdm(bundle.entry, desc="Parse entry"):
        if entry.resource is not None and entry.resource.resourceType == "Condition":
            conditions.append(entry.resource)
        if entry.resource is not None and entry.resource.resourceType == "Patient":
            patients.append(entry.resource)
    if i >= NUM_PATIENTS:
        break


Parse entry: 100%|██████████| 303/303 [00:00<00:00, 1206907.99it/s]

Parse entry: 100%|██████████| 272/272 [00:00<00:00, 928275.58it/s]

Parse entry: 100%|██████████| 370/370 [00:00<00:00, 1230683.97it/s]

Parse entry: 100%|██████████| 122/122 [00:00<00:00, 904072.59it/s]

Parse entry: 100%|██████████| 116/116 [00:00<00:00, 757849.32it/s]

Parse entry: 100%|██████████| 359/359 [00:00<00:00, 1071711.84it/s]

Parse entry: 100%|██████████| 328/328 [00:00<00:00, 1100585.37it/s]

Parse entry: 100%|██████████| 2280/2280 [00:00<00:00, 1332452.71it/s]

Parse entry: 100%|██████████| 137/137 [00:00<00:00, 729212.75it/s]

Parse entry: 100%|██████████| 283/283 [00:00<00:00, 1348850.04it/s]

Parse entry: 100%|██████████| 335/335 [00:00<00:00, 1012314.01it/s]
Parse Bundle: 10it [01:07,  6.74s/it]


In [13]:
# Here we use the polars library to convert the list of conditions and patients to DataFrames
import polars as pl

df_conditions = pl.DataFrame([c.dict(exclude_none=True) for c in conditions])
df_patients = pl.DataFrame([p.dict(exclude_none=True) for p in patients])

In [14]:
# You can see that pola.rs supports nested data structures through the use of Arrow under the hood
df_patients.select("id", "name", "birthDate")

id,name,birthDate
str,list[struct[6]],str
"""766a0f64-cfd6-…","[{""official"",[""Robert854""],""Botsford977"",[""Mrs.""],[],[]}, {""maiden"",[""Robert854""],""Ward668"",[""Mrs.""],[],[]}]","""1946-04-26"""
"""c749327f-e726-…","[{""official"",[""Will178""],""Lang846"",[],[],[]}]","""2011-03-11"""
"""338e0f93-2fb1-…","[{""official"",[""Maxwell782""],""Koepp521"",[""Mr.""],[],[]}]","""1957-09-14"""
"""5e6177fd-cc45-…","[{""official"",[""Gianna370""],""McClure239"",[],[],[]}]","""2017-11-09"""
"""73d58836-0439-…","[{""official"",[""Shelia548""],""Sipes176"",[],[],[]}]","""2017-12-27"""
"""4ee9ca6e-abda-…","[{""official"",[""Hertha832""],""Schuster709"",[""Mrs.""],[],[]}, {""maiden"",[""Hertha832""],""Conn188"",[""Mrs.""],[],[]}]","""1966-08-30"""
"""62ac0f0c-6877-…","[{""official"",[""Faustino767""],""Johnson679"",[""Mr.""],[],[]}]","""1946-03-29"""
"""496593fe-85f9-…","[{""official"",[""Harrison106""],""Schuster709"",[""Mr.""],[],[]}]","""1911-04-08"""
"""d4155021-030f-…","[{""official"",[""María Elena653""],""Serrato62"",[""Mrs.""],[],[]}, {""maiden"",[""María Elena653""],""Carrasquillo418"",[""Mrs.""],[],[]}]","""1966-04-24"""
"""15ff0f7a-48a3-…","[{""official"",[""Lucy743""],""Kris249"",[],[],[]}]","""2009-11-20"""


In [15]:
df_conditions.select("id", "code", "onsetDateTime", "abatementDateTime", subject=pl.col("subject").struct["reference"],)

id,code,onsetDateTime,abatementDateTime,subject
str,struct[3],str,str,str
"""e35163ad-e206-…","{""Hypertension"",[{""http://snomed.info/sct"",""Hypertension"",[],""59621000""}],[]}","""1964-06-19T08:…",,"""urn:uuid:766a0…"
"""cd1ad77b-3505-…","{""Prediabetes"",[{""http://snomed.info/sct"",""Prediabetes"",[],""15777000""}],[]}","""1980-07-11T08:…",,"""urn:uuid:766a0…"
"""c862baa3-7348-…","{""Anemia (disorder)"",[{""http://snomed.info/sct"",""Anemia (disorder)"",[],""271737000""}],[]}","""1980-07-11T08:…",,"""urn:uuid:766a0…"
"""98d39fd3-4cd0-…","{""Miscarriage in first trimester"",[{""http://snomed.info/sct"",""Miscarriage in first trimester"",[],""19169002""}],[]}","""1989-11-03T07:…",,"""urn:uuid:766a0…"
"""e0387a43-6421-…","{""Osteoarthritis of hip"",[{""http://snomed.info/sct"",""Osteoarthritis of hip"",[],""239872002""}],[]}","""2000-04-12T08:…",,"""urn:uuid:766a0…"
"""798116e1-b470-…","{""Viral sinusitis (disorder)"",[{""http://snomed.info/sct"",""Viral sinusitis (disorder)"",[],""444814009""}],[]}","""2012-05-08T08:…","""2012-05-29T08:…","""urn:uuid:766a0…"
"""c002ef60-9a78-…","{""Acute bacterial sinusitis (disorder)"",[{""http://snomed.info/sct"",""Acute bacterial sinusitis (disorder)"",[],""75498004""}],[]}","""2017-11-04T08:…","""2017-12-30T07:…","""urn:uuid:766a0…"
"""91e43578-e138-…","{""Chronic sinusitis (disorder)"",[{""http://snomed.info/sct"",""Chronic sinusitis (disorder)"",[],""40055000""}],[]}","""2017-12-23T07:…",,"""urn:uuid:766a0…"
"""78c6f610-99d6-…","{""Sprain of ankle"",[{""http://snomed.info/sct"",""Sprain of ankle"",[],""44465007""}],[]}","""2012-04-04T04:…","""2012-04-25T04:…","""urn:uuid:c7493…"
"""0050c92a-bd97-…","{""Fracture of clavicle"",[{""http://snomed.info/sct"",""Fracture of clavicle"",[],""58150001""}],[]}","""2013-05-20T04:…","""2013-08-18T04:…","""urn:uuid:c7493…"


In [None]:
# When doing analytics you want to flatten the data before you start
# This is easy to do with polars though it does require some knowledge of the FHIR spec
df_flattened = df_conditions.select(
        subject=pl.col("subject").struct["reference"].str.strip("urn:uuid:"),
        code=pl.col("code").struct["coding"].list.eval(pl.element().struct["system"]+"|"+pl.element().struct["code"]),
        onsetDateTime=pl.col("onsetDateTime"),
        abatementDateTime=pl.col("abatementDateTime"),
        encounter=pl.col("encounter").struct["reference"].str.strip("urn:uuid:"),
        clinicalStatus=pl.col("clinicalStatus").struct["coding"].list.first().struct["code"],
        verificationStatus=pl.col("verificationStatus").struct["coding"].list.first().struct["code"],
)
df_flattened

subject,code,onsetDateTime,abatementDateTime,encounter,clinicalStatus,verificationStatus
str,list[str],str,str,str,str,str
"""766a0f64-cfd6-…","[""http://snomed.info/sct|59621000""]","""1964-06-19T08:…",,"""0c8e8aa-5659-4…","""active""","""confirmed"""
"""766a0f64-cfd6-…","[""http://snomed.info/sct|15777000""]","""1980-07-11T08:…",,"""bb2c950-bc65-4…","""active""","""confirmed"""
"""766a0f64-cfd6-…","[""http://snomed.info/sct|271737000""]","""1980-07-11T08:…",,"""bb2c950-bc65-4…","""active""","""confirmed"""
"""766a0f64-cfd6-…","[""http://snomed.info/sct|19169002""]","""1989-11-03T07:…",,"""3952e337-456b-…","""active""","""confirmed"""
"""766a0f64-cfd6-…","[""http://snomed.info/sct|239872002""]","""2000-04-12T08:…",,"""ebef64c1-af40-…","""active""","""confirmed"""
"""766a0f64-cfd6-…","[""http://snomed.info/sct|444814009""]","""2012-05-08T08:…","""2012-05-29T08:…","""f4340cd6-5567-…","""resolved""","""confirmed"""
"""766a0f64-cfd6-…","[""http://snomed.info/sct|75498004""]","""2017-11-04T08:…","""2017-12-30T07:…","""46815444-f6ef-…","""resolved""","""confirmed"""
"""766a0f64-cfd6-…","[""http://snomed.info/sct|40055000""]","""2017-12-23T07:…",,"""cf2f10ba-2786-…","""active""","""confirmed"""
"""c749327f-e726-…","[""http://snomed.info/sct|44465007""]","""2012-04-04T04:…","""2012-04-25T04:…","""5d517575-0ade-…","""resolved""","""confirmed"""
"""c749327f-e726-…","[""http://snomed.info/sct|58150001""]","""2013-05-20T04:…","""2013-08-18T04:…","""5dfe632f-c6b7-…","""resolved""","""confirmed"""


In [None]:
# Now we can join the flattened conditions with the patients
df_flattened_with_subject = df_flattened.join(
                    df_patients.select("id", "birthDate", name=pl.col("name").list.eval(pl.element().struct["given"].list.first()+" "+pl.element().struct["family"])),
                   left_on=pl.col("subject"), 
                   right_on=pl.col("id"), 
                   how="inner")
df_flattened_with_subject

subject,code,onsetDateTime,abatementDateTime,encounter,clinicalStatus,verificationStatus,birthDate,name
str,list[str],datetime[μs],datetime[μs],str,str,str,date,list[str]
"""766a0f64-cfd6-…","[""http://snomed.info/sct|59621000""]",1964-06-19 12:31:19,,"""0c8e8aa-5659-4…","""active""","""confirmed""",1946-04-26,"[""Robert854 Botsford977"", ""Robert854 Ward668""]"
"""766a0f64-cfd6-…","[""http://snomed.info/sct|15777000""]",1980-07-11 12:31:19,,"""bb2c950-bc65-4…","""active""","""confirmed""",1946-04-26,"[""Robert854 Botsford977"", ""Robert854 Ward668""]"
"""766a0f64-cfd6-…","[""http://snomed.info/sct|271737000""]",1980-07-11 12:31:19,,"""bb2c950-bc65-4…","""active""","""confirmed""",1946-04-26,"[""Robert854 Botsford977"", ""Robert854 Ward668""]"
"""766a0f64-cfd6-…","[""http://snomed.info/sct|19169002""]",1989-11-03 12:31:19,,"""3952e337-456b-…","""active""","""confirmed""",1946-04-26,"[""Robert854 Botsford977"", ""Robert854 Ward668""]"
"""766a0f64-cfd6-…","[""http://snomed.info/sct|239872002""]",2000-04-12 12:31:19,,"""ebef64c1-af40-…","""active""","""confirmed""",1946-04-26,"[""Robert854 Botsford977"", ""Robert854 Ward668""]"
"""766a0f64-cfd6-…","[""http://snomed.info/sct|444814009""]",2012-05-08 12:31:19,2012-05-29 12:31:19,"""f4340cd6-5567-…","""resolved""","""confirmed""",1946-04-26,"[""Robert854 Botsford977"", ""Robert854 Ward668""]"
"""766a0f64-cfd6-…","[""http://snomed.info/sct|75498004""]",2017-11-04 12:31:19,2017-12-30 12:31:19,"""46815444-f6ef-…","""resolved""","""confirmed""",1946-04-26,"[""Robert854 Botsford977"", ""Robert854 Ward668""]"
"""766a0f64-cfd6-…","[""http://snomed.info/sct|40055000""]",2017-12-23 12:31:19,,"""cf2f10ba-2786-…","""active""","""confirmed""",1946-04-26,"[""Robert854 Botsford977"", ""Robert854 Ward668""]"
"""c749327f-e726-…","[""http://snomed.info/sct|44465007""]",2012-04-04 08:25:10,2012-04-25 08:25:10,"""5d517575-0ade-…","""resolved""","""confirmed""",2011-03-11,"[""Will178 Lang846""]"
"""c749327f-e726-…","[""http://snomed.info/sct|58150001""]",2013-05-20 08:25:10,2013-08-18 08:25:10,"""5dfe632f-c6b7-…","""resolved""","""confirmed""",2011-03-11,"[""Will178 Lang846""]"


In [None]:
df_flattened_with_subject.get_column("code").explode().value_counts().to_dict()

coding,counts
str,u32
"""http://snomed.…",1
"""http://snomed.…",2
"""http://snomed.…",1
"""http://snomed.…",3
"""http://snomed.…",8
"""http://snomed.…",1
"""http://snomed.…",2
"""http://snomed.…",3
"""http://snomed.…",1
"""http://snomed.…",1


In [86]:
df_conditions_with_code_tokens.filter(pl.col("coding").list.contains("http://snomed.info/sct|65363002"))

resourceType,id,note,stage,subject,category,bodySite,onsetDateTime,evidence,contained,extension,encounter,identifier,recordedDate,clinicalStatus,modifierExtension,verificationStatus,code,abatementDateTime,coding
str,str,list[null],list[null],struct[2],list[null],list[null],datetime[μs],list[null],list[null],list[null],struct[2],list[null],datetime[μs],struct[2],list[null],struct[2],struct[3],datetime[μs],list[str]
"""Condition""","""3536a438-17e8-…",[],[],"{[],""urn:uuid:c749327f-e726-4100-9b54-e5159a8ec184""}",[],[],2013-06-28 08:25:10,[],[],[],"{[],""urn:uuid:e458dc49-bcd7-4854-80c5-483f36848f75""}",[],2013-06-28 08:25:10,"{[{""http://terminology.hl7.org/CodeSystem/condition-clinical"",[],""resolved""}],[]}",[],"{[{""http://terminology.hl7.org/CodeSystem/condition-ver-status"",[],""confirmed""}],[]}","{""Otitis media"",[{""http://snomed.info/sct"",""Otitis media"",[],""65363002""}],[]}",2013-08-16 08:25:10,"[""http://snomed.info/sct|65363002""]"
"""Condition""","""9638a7c4-f30f-…",[],[],"{[],""urn:uuid:c749327f-e726-4100-9b54-e5159a8ec184""}",[],[],2016-07-31 08:25:10,[],[],[],"{[],""urn:uuid:6cf72b78-08ab-41bf-9895-fe2131edac73""}",[],2016-07-31 08:25:10,"{[{""http://terminology.hl7.org/CodeSystem/condition-clinical"",[],""resolved""}],[]}",[],"{[{""http://terminology.hl7.org/CodeSystem/condition-ver-status"",[],""confirmed""}],[]}","{""Otitis media"",[{""http://snomed.info/sct"",""Otitis media"",[],""65363002""}],[]}",2017-02-24 08:25:10,"[""http://snomed.info/sct|65363002""]"
"""Condition""","""4d02a6d9-1879-…",[],[],"{[],""urn:uuid:15ff0f7a-48a3-40f3-86fa-cf89a31c69fd""}",[],[],2015-01-24 01:16:55,[],[],[],"{[],""urn:uuid:f0400321-42e1-4536-ae3c-aa948fda3167""}",[],2015-01-24 01:16:55,"{[{""http://terminology.hl7.org/CodeSystem/condition-clinical"",[],""resolved""}],[]}",[],"{[{""http://terminology.hl7.org/CodeSystem/condition-ver-status"",[],""confirmed""}],[]}","{""Otitis media"",[{""http://snomed.info/sct"",""Otitis media"",[],""65363002""}],[]}",2015-08-29 01:16:55,"[""http://snomed.info/sct|65363002""]"
