https://blog.algorexhealth.com/2018/08/calculating-growth-and-obesity-from-fhir-messages/

In [1]:
from pathlib import Path
import json
import fhirclient.models.bundle as b
import fhirclient.models.patient as p
from fhirclient.models.fhirabstractbase import FHIRValidationError
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
def load_bundle(path):
    try:
        with open(path) as file:
            json_data = json.load(file)
            bundle = b.Bundle(json_data)
    except FHIRValidationError :
        print(f'Skipping invalid FHIR data in {path}')
        bundle = None
    except:
        print(f'Unexpected error: {sys.exc_info()[0]}')
        raise
    return bundle

In [None]:
json_path = Path("./data/1k-patients/fhir")
bundles = {p.stem:load_bundle(p) for p in json_path.glob('*.json')}
# remove missing bundles
bundles = {key:value for key, value in bundles.items() if value != None}
print(f'Loaded {len(bundles)} FHIR Bundles')

Skipping invalid FHIR data in data/1k-patients/fhir/Leslie90_Willms744_47a6be70-4bbc-4ab4-8d63-d9c19b05b903.json
Skipping invalid FHIR data in data/1k-patients/fhir/Lola232_Nieves278_73d98dd4-a8a2-465e-9fd2-cb7d6ce3b466.json
Skipping invalid FHIR data in data/1k-patients/fhir/Josiah310_Predovic534_22f8f53c-617b-4e54-82a7-26abf5e22938.json


In [None]:
len(bundles)

In [None]:
def bundleToResourceTypeDict(bundle):
    if (bundle.resource_type != "Bundle"):
        print(f'Expected bundle, got {bundle.resource_type}')
        return {}
    
    resource_types = np.array([e.resource.resource_type for e in bundle.entry])
    unique, counts = np.unique(resource_types, return_counts=True)
    return dict(zip(unique, counts))

In [None]:
def bundlesToResourceTypeDf(bundles, index):
    resourceTypeDicts = [bundleToResourceTypeDict(b) for b in bundles]
    df = pd.DataFrame(resourceTypeDicts, index=index)
    df.fillna(0, inplace=True) # replace NaNs with zeros
    return df.astype(int)

In [None]:
df = bundlesToResourceTypeDf(bundles.values(), bundles.keys())

In [None]:
df.head()

In [None]:
df["Patient"].describe()

Most of the bundles have single patient but some bundles doesn't have a patient at all. Let's take bundles with zero patients to a separate data frame.

In [None]:
df2 = df[df["Patient"] == 0]
# Remove the rows from the original data frame
df = df[df["Patient"] != 0] 

In [None]:
df2.head()

The two bundles contain only organizations and practitioners.

In [None]:
resources_sum = df.sum().sort_values(ascending=False); resources_sum

In [None]:
corr = df.corr()

# In order to remove duplicate information,
# use mask to show only other half of the heatmap
mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True
colormap = "YlGnBu"

# set figure size
plt.subplots(figsize=(10,10))

ax = sns.heatmap(corr, mask=mask, square=True, cmap=colormap)

* Explain: Encounter, Claim, ExplanationOfBenefit
* Explain: Observation, DiagnosisReport, ImagingStudy

Get bundle with highest number of encounters

In [None]:
sorted_df = df.sort_values("Encounter", axis=0, ascending=False)

In [None]:
sorted_df.index[0]