# Synthea Data Mining

The Synthea Health tool was used to generate a total of 11,638 electronic health care records:

```bash
sh synthea_setup.sh
```

Health records in FIHR format: https://en.wikipedia.org/wiki/Fast_Healthcare_Interoperability_Resources are provided as JSON files in the output folder

# Setup

## Libraries

In [1]:
#!pip install fhir.resources
!pip install pandas
!pip install numpy



In [2]:
import pandas as pd 
import numpy as np
import datetime

from fhir.resources.R4B.bundle import Bundle
from fhir.resources.R4B.patient import Patient
from fhir.resources.R4B.condition import Condition
from fhir.resources.R4B.observation import Observation
from fhir.resources.R4B.medicationrequest import MedicationRequest
from fhir.resources.R4B.procedure import Procedure
from fhir.resources.R4B.encounter import Encounter
from fhir.resources.R4B.claim import Claim
from fhir.resources.R4B.immunization import Immunization
from fhir.resources.R4B.humanname import HumanName

## Functions

### FIHR Import

In [3]:
def read_fihr_json(file_path):
    pt_resources = []
    pt_bundle = Bundle.parse_file(file_path)
    for entry in pt_bundle.entry:
        pt_resources.append(entry.resource)
    return(pt_resources)

example = 'output/fhir/Aaron697_Eichmann909_8f8b9664-6af2-a8f6-694e-1eed399ea223.json'
ex_resources = read_fihr_json(example)
set([type(x) for x in ex_resources])

{fhir.resources.R4B.allergyintolerance.AllergyIntolerance,
 fhir.resources.R4B.careplan.CarePlan,
 fhir.resources.R4B.careteam.CareTeam,
 fhir.resources.R4B.claim.Claim,
 fhir.resources.R4B.condition.Condition,
 fhir.resources.R4B.diagnosticreport.DiagnosticReport,
 fhir.resources.R4B.documentreference.DocumentReference,
 fhir.resources.R4B.encounter.Encounter,
 fhir.resources.R4B.explanationofbenefit.ExplanationOfBenefit,
 fhir.resources.R4B.immunization.Immunization,
 fhir.resources.R4B.medicationrequest.MedicationRequest,
 fhir.resources.R4B.observation.Observation,
 fhir.resources.R4B.patient.Patient,
 fhir.resources.R4B.procedure.Procedure,
 fhir.resources.R4B.provenance.Provenance}

### Patient Information

In [4]:
def get_patient_address_info(patient_obj):
    pt_country, pt_state, pt_city, pt_lat, pt_lon = '', '', '', 0,0
    for entry in patient_obj.address:
        pt_country += entry.country
        pt_state += entry.state
        pt_city += entry.city
        for ext in entry.extension:
            for ext2 in ext.extension:
                if ext2.url == 'latitude':
                    pt_lat += float(ext2.valueDecimal)
                if ext2.url == 'longitude':
                    pt_lon += float(ext2.valueDecimal)
    return pt_country, pt_state, pt_city, pt_lat, pt_lon

def get_patient_demographic(patient_obj):
    pt_birth_date = patient_obj.birthDate
    pt_gender = patient_obj.gender
    pt_gp = patient_obj.generalPractitioner
    pt_id = patient_obj.id
    pt_martial_status = patient_obj.maritalStatus.text
    if len(patient_obj.name[0].given) == 2:
        pt_first_name = patient_obj.name[0].given[0]
        pt_middle_name = patient_obj.name[0].given[1]
    else:
        pt_first_name = patient_obj.name[0].given[0]
        pt_middle_name = 'None'
    pt_family_name = patient_obj.name[0].family
    return pt_id, pt_first_name, pt_middle_name, pt_family_name, pt_gender, pt_birth_date, pt_martial_status, pt_gp

def get_patient_age(patient_obj):
    pt_bd = get_patient_demographic(patient_obj)[5]
    today = datetime.date.today()
    pt_age = int((today - pt_bd).days // 365.2425)
    return(pt_age)

def create_patient_entry(resource_obj):
    patient_ind = [ind for ind, entry in enumerate(ex_resources) if entry.resource_type == 'Patient'][0]
    patient_obj = resource_obj[patient_ind]
    pt_id, pt_first_name, pt_middle_name, pt_family_name, pt_gender, pt_birth_date, pt_marital_status, pt_gp = get_patient_demographic(patient_obj)
    pt_age = get_patient_age(patient_obj)
    pt_country, pt_state, pt_city, pt_lat, pt_lon = get_patient_address_info(patient_obj)
    pt_entry = {
        'uuid':pt_id,
        'first_name':pt_first_name,
        'middle_name':pt_middle_name,
        'family_name':pt_family_name,
        'gender':pt_gender,
        'birth_date': str(pt_birth_date),
        'marital_status': pt_marital_status,
        'general_practioner': pt_gp,
        'age_years': pt_age,
        'country': pt_country,
        'state': pt_state,
        'city': pt_city,
        'location_lat': pt_lat,
        'location_long': pt_lon
    }
    return pt_entry

In [5]:
create_patient_entry(ex_resources)

{'uuid': '8f8b9664-6af2-a8f6-694e-1eed399ea223',
 'first_name': 'Aaron697',
 'middle_name': 'Don899',
 'family_name': 'Eichmann909',
 'gender': 'male',
 'birth_date': '1956-04-06',
 'marital_status': 'Never Married',
 'general_practioner': None,
 'age_years': 67,
 'country': 'US',
 'state': 'MA',
 'city': 'East Longmeadow',
 'location_lat': 42.05782803279011,
 'location_long': -72.45628312557714}

### Condition

In [6]:
condition_ind = [ind for ind, entry in enumerate(ex_resources) if entry.resource_type == 'Condition']
test = ex_resources[condition_ind[0]]
test

Condition(resource_type='Condition', fhir_comments=None, id='ad176158-b405-bc88-1830-af7cf696ba3b', implicitRules=None, implicitRules__ext=None, language=None, language__ext=None, meta=Meta(resource_type='Meta', fhir_comments=None, extension=None, id=None, lastUpdated=None, lastUpdated__ext=None, profile=['http://hl7.org/fhir/us/core/StructureDefinition/us-core-condition-encounter-diagnosis'], profile__ext=None, security=None, source=None, source__ext=None, tag=None, versionId=None, versionId__ext=None), contained=None, extension=None, modifierExtension=None, text=None, abatementAge=None, abatementDateTime=None, abatementDateTime__ext=None, abatementPeriod=None, abatementRange=None, abatementString=None, abatementString__ext=None, asserter=None, bodySite=None, category=[CodeableConcept(resource_type='CodeableConcept', fhir_comments=None, extension=None, id=None, coding=[Coding(resource_type='Coding', fhir_comments=None, extension=None, id=None, code='encounter-diagnosis', code__ext=Non

In [7]:
print(test.resource_type)
print(test.id)
print(test.category[0].coding[0].display)
print(test.code.coding[0].code)
print(test.code.coding[0].system)
print(test.code.coding[0].display)
print(test.onsetDateTime)
print(test.recordedDate)
print(test.subject.reference.replace('urn:uuid:', ''))

Condition
ad176158-b405-bc88-1830-af7cf696ba3b
Encounter Diagnosis
224299000
http://snomed.info/sct
Received higher education (finding)
1974-05-31 11:54:53-04:00
1974-05-31 11:54:53-04:00
8f8b9664-6af2-a8f6-694e-1eed399ea223


In [8]:
test2 = ex_resources[condition_ind[10]]
test2

Condition(resource_type='Condition', fhir_comments=None, id='7cc82150-2f62-3017-cf8f-a8f570d001ef', implicitRules=None, implicitRules__ext=None, language=None, language__ext=None, meta=Meta(resource_type='Meta', fhir_comments=None, extension=None, id=None, lastUpdated=None, lastUpdated__ext=None, profile=['http://hl7.org/fhir/us/core/StructureDefinition/us-core-condition-encounter-diagnosis'], profile__ext=None, security=None, source=None, source__ext=None, tag=None, versionId=None, versionId__ext=None), contained=None, extension=None, modifierExtension=None, text=None, abatementAge=None, abatementDateTime=datetime.datetime(2007, 4, 18, 13, 1, 17, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), abatementDateTime__ext=None, abatementPeriod=None, abatementRange=None, abatementString=None, abatementString__ext=None, asserter=None, bodySite=None, category=[CodeableConcept(resource_type='CodeableConcept', fhir_comments=None, extension=None, id=None, coding=[Coding(res

In [9]:
print(test2.resource_type)
print(test2.id)
print(test2.category[0].coding[0].display)
print(test2.code.coding[0].code)
print(test2.code.coding[0].system)
print(test2.code.coding[0].display)
print(test2.recordedDate)
print(test2.subject.reference.replace('urn:uuid:', ''))

Condition
7cc82150-2f62-3017-cf8f-a8f570d001ef
Encounter Diagnosis
183996000
http://snomed.info/sct
Sterilization requested (situation)
2007-03-29 11:22:17-04:00
8f8b9664-6af2-a8f6-694e-1eed399ea223


In [10]:
def get_condition_entry(condition_obj):
    pt_id = condition_obj.subject.reference.replace('urn:uuid:', '')
    cond_entry_id = condition_obj.encounter.reference.replace('urn:uuid:', '')
    cond_category = condition_obj.category[0].coding[0].display
    cond_code = condition_obj.code.coding[0].code
    cond_system = condition_obj.code.coding[0].system
    cond_text = condition_obj.code.coding[0].display
    cond_date = str(condition_obj.recordedDate.date())
    condition_entry = {
        'uuid':pt_id,
        'entry_id':cond_entry_id,
        'category':cond_category,
        'code':cond_code,
        'code_system':cond_system,
        'code_text':cond_text,
        'record_date':cond_date
    }
    return condition_entry

def create_condition_entries(resource_obj):
    condition_entries = {
    'uuid':[],
    'entry_id':[],
    'category':[],
    'code':[],
    'code_system':[],
    'code_text':[],
    'record_date':[]
    }
    condition_ind = [ind for ind, entry in enumerate(resource_obj) if entry.resource_type == 'Condition']
    for ind in condition_ind:
        entry = ex_resources[ind]
        entry_dict = get_condition_entry(entry)
        for key, item in entry_dict.items():
            condition_entries[key] += [item]
    return condition_entries

In [11]:
create_condition_entries(ex_resources)

{'uuid': ['8f8b9664-6af2-a8f6-694e-1eed399ea223',
  '8f8b9664-6af2-a8f6-694e-1eed399ea223',
  '8f8b9664-6af2-a8f6-694e-1eed399ea223',
  '8f8b9664-6af2-a8f6-694e-1eed399ea223',
  '8f8b9664-6af2-a8f6-694e-1eed399ea223',
  '8f8b9664-6af2-a8f6-694e-1eed399ea223',
  '8f8b9664-6af2-a8f6-694e-1eed399ea223',
  '8f8b9664-6af2-a8f6-694e-1eed399ea223',
  '8f8b9664-6af2-a8f6-694e-1eed399ea223',
  '8f8b9664-6af2-a8f6-694e-1eed399ea223',
  '8f8b9664-6af2-a8f6-694e-1eed399ea223',
  '8f8b9664-6af2-a8f6-694e-1eed399ea223',
  '8f8b9664-6af2-a8f6-694e-1eed399ea223',
  '8f8b9664-6af2-a8f6-694e-1eed399ea223',
  '8f8b9664-6af2-a8f6-694e-1eed399ea223',
  '8f8b9664-6af2-a8f6-694e-1eed399ea223'],
 'entry_id': ['a12ccdf4-16fb-bc58-140d-a55d7c449144',
  '586a8ff8-44de-508d-5bc5-867327f7e277',
  '8ec11d31-f320-fd79-9af7-ded46308e0d5',
  '2475fed8-47a5-cc05-ed15-1d73069d45af',
  '2475fed8-47a5-cc05-ed15-1d73069d45af',
  '8b5c1166-0544-5fde-1f8f-d4cb9b55dbca',
  '8b5c1166-0544-5fde-1f8f-d4cb9b55dbca',
  'af068d5e-d

In [12]:
condition_ind = [ind for ind, entry in enumerate(ex_resources) if entry.resource_type == 'Condition']
ex_resources[condition_ind[4]].encounter.reference

'urn:uuid:2475fed8-47a5-cc05-ed15-1d73069d45af'

In [13]:
pd.DataFrame(create_condition_entries(ex_resources))

Unnamed: 0,uuid,entry_id,category,code,code_system,code_text,record_date
0,8f8b9664-6af2-a8f6-694e-1eed399ea223,a12ccdf4-16fb-bc58-140d-a55d7c449144,Encounter Diagnosis,224299000,http://snomed.info/sct,Received higher education (finding),1974-05-31
1,8f8b9664-6af2-a8f6-694e-1eed399ea223,586a8ff8-44de-508d-5bc5-867327f7e277,Encounter Diagnosis,423315002,http://snomed.info/sct,Limited social contact (finding),1984-06-15
2,8f8b9664-6af2-a8f6-694e-1eed399ea223,8ec11d31-f320-fd79-9af7-ded46308e0d5,Encounter Diagnosis,706893006,http://snomed.info/sct,Victim of intimate partner abuse (finding),1990-06-22
3,8f8b9664-6af2-a8f6-694e-1eed399ea223,2475fed8-47a5-cc05-ed15-1d73069d45af,Encounter Diagnosis,160903007,http://snomed.info/sct,Full-time employment (finding),1998-04-17
4,8f8b9664-6af2-a8f6-694e-1eed399ea223,2475fed8-47a5-cc05-ed15-1d73069d45af,Encounter Diagnosis,422650009,http://snomed.info/sct,Social isolation (finding),1998-04-17
5,8f8b9664-6af2-a8f6-694e-1eed399ea223,8b5c1166-0544-5fde-1f8f-d4cb9b55dbca,Encounter Diagnosis,314529007,http://snomed.info/sct,Medication review due (situation),2000-04-21
6,8f8b9664-6af2-a8f6-694e-1eed399ea223,8b5c1166-0544-5fde-1f8f-d4cb9b55dbca,Encounter Diagnosis,73595000,http://snomed.info/sct,Stress (finding),2000-04-21
7,8f8b9664-6af2-a8f6-694e-1eed399ea223,af068d5e-dff9-ec28-dbf7-ccd2acf40bd2,Encounter Diagnosis,195662009,http://snomed.info/sct,Acute viral pharyngitis (disorder),2003-07-07
8,8f8b9664-6af2-a8f6-694e-1eed399ea223,3c59053b-c104-f70c-13c5-2fbaf4083b96,Encounter Diagnosis,314529007,http://snomed.info/sct,Medication review due (situation),2004-04-30
9,8f8b9664-6af2-a8f6-694e-1eed399ea223,3c59053b-c104-f70c-13c5-2fbaf4083b96,Encounter Diagnosis,162864005,http://snomed.info/sct,Body mass index 30+ - obesity (finding),2004-04-30


### Observation

In [14]:
def get_observation_entry(observation_obj):
    pt_id = []
    obs_entry_id = []
    obs_category = []
    obs_code = []
    obs_system = []
    obs_text = []
    obs_value = []
    obs_unit = []
    obs_date = []
    if observation_obj.component is not None:
        for obs_comp in observation_obj.component:
            pt_id.append(observation_obj.subject.reference.replace('urn:uuid:', ''))
            obs_entry_id.append(observation_obj.encounter.reference.replace('urn:uuid:', ''))
            obs_category.append(observation_obj.category[0].coding[0].display)
            obs_code.append(obs_comp.code.coding[0].code)
            obs_system.append(obs_comp.code.coding[0].system)
            obs_text.append(obs_comp.code.coding[0].display)
            if (obs_comp.valueQuantity is None) & (obs_comp.valueCodeableConcept is None):
                obs_value.append(None)
                obs_unit.append(None)
            elif (obs_comp.valueQuantity is None) & (obs_comp.valueCodeableConcept is not None):
                obs_value.append(obs_comp.valueCodeableConcept.text)
                obs_unit.append(obs_comp.valueCodeableConcept.coding[0].code)
            else:
                obs_value.append(obs_comp.valueQuantity.value)
                obs_unit.append(obs_comp.valueQuantity.unit)
            obs_date.append(str(observation_obj.effectiveDateTime.date()))
    else:
        pt_id.append(observation_obj.subject.reference.replace('urn:uuid:', ''))
        obs_entry_id.append(observation_obj.encounter.reference.replace('urn:uuid:', ''))
        obs_category.append(observation_obj.category[0].coding[0].display)
        obs_code.append(observation_obj.code.coding[0].code)
        obs_system.append(observation_obj.code.coding[0].system)
        obs_text.append(observation_obj.code.coding[0].display)
        if (observation_obj.valueQuantity is None) & (observation_obj.valueCodeableConcept is None):
                obs_value.append(None)
                obs_unit.append(None)
        elif (observation_obj.valueQuantity is None) & (observation_obj.valueCodeableConcept is not None):
            obs_value.append(observation_obj.valueCodeableConcept.text)
            obs_unit.append(observation_obj.valueCodeableConcept.coding[0].code)
        else:
            obs_value.append(observation_obj.valueQuantity.value)
            obs_unit.append(observation_obj.valueQuantity.unit)
        obs_date.append(str(observation_obj.effectiveDateTime.date()))
    observation_entry = {
        'uuid':pt_id,
        'entry_id':obs_entry_id,
        'category':obs_category,
        'code':obs_code,
        'code_system':obs_system,
        'code_text':obs_text,
        'code_value':obs_value,
        'code_unit':obs_unit,
        'record_date':obs_date
    }
    return observation_entry

def create_observation_entries(resource_obj):
    observation_entries = {
    'uuid':[],
    'entry_id':[],
    'category':[],
    'code':[],
    'code_system':[],
    'code_text':[],
    'code_value':[],
    'code_unit':[],
    'record_date':[]
    }
    observation_ind = [ind for ind, entry in enumerate(resource_obj) if entry.resource_type == 'Observation']
    for ind in observation_ind:
        entry = ex_resources[ind]
        entry_dict = get_observation_entry(entry)
        for key, item in entry_dict.items():
            observation_entries[key] += item
    return observation_entries

In [15]:
test = pd.DataFrame(create_observation_entries(ex_resources))
test

Unnamed: 0,uuid,entry_id,category,code,code_system,code_text,code_value,code_unit,record_date
0,8f8b9664-6af2-a8f6-694e-1eed399ea223,9679dc6a-ba35-9314-7ed5-ad3c877ca79c,Vital signs,8302-2,http://loinc.org,Body Height,171.6,cm,2002-04-26
1,8f8b9664-6af2-a8f6-694e-1eed399ea223,9679dc6a-ba35-9314-7ed5-ad3c877ca79c,Vital signs,72514-3,http://loinc.org,Pain severity - 0-10 verbal numeric rating [Sc...,2,{score},2002-04-26
2,8f8b9664-6af2-a8f6-694e-1eed399ea223,9679dc6a-ba35-9314-7ed5-ad3c877ca79c,Vital signs,29463-7,http://loinc.org,Body Weight,85.8,kg,2002-04-26
3,8f8b9664-6af2-a8f6-694e-1eed399ea223,9679dc6a-ba35-9314-7ed5-ad3c877ca79c,Vital signs,39156-5,http://loinc.org,Body mass index (BMI) [Ratio],29.13,kg/m2,2002-04-26
4,8f8b9664-6af2-a8f6-694e-1eed399ea223,9679dc6a-ba35-9314-7ed5-ad3c877ca79c,Vital signs,8462-4,http://loinc.org,Diastolic Blood Pressure,71,mm[Hg],2002-04-26
...,...,...,...,...,...,...,...,...,...
258,8f8b9664-6af2-a8f6-694e-1eed399ea223,226a5e8e-2eeb-b12e-81db-10ad75b570af,Survey,56051-6,http://loinc.org,Do you consider yourself Hispanic/Latino?,No,LA32-8,2010-04-30
259,8f8b9664-6af2-a8f6-694e-1eed399ea223,226a5e8e-2eeb-b12e-81db-10ad75b570af,Survey,70274-6,http://loinc.org,Generalized anxiety disorder 7 item (GAD-7) to...,2,{score},2010-04-30
260,8f8b9664-6af2-a8f6-694e-1eed399ea223,226a5e8e-2eeb-b12e-81db-10ad75b570af,Survey,55758-7,http://loinc.org,Patient Health Questionnaire 2 item (PHQ-2) to...,1,{score},2010-04-30
261,8f8b9664-6af2-a8f6-694e-1eed399ea223,226a5e8e-2eeb-b12e-81db-10ad75b570af,Survey,82667-7,http://loinc.org,Total score [DAST-10],1,{score},2010-04-30


In [16]:
test['category'].value_counts()

category
Survey            164
Vital signs        57
Laboratory         34
Social history      7
Exam                1
Name: count, dtype: int64

In [17]:
vital_obs_df = test[test['category'] == 'Vital signs']
survey_obs_df = test[test['category'] == 'Survey']
lab_obs_df = test[test['category'] == 'Laboratory']
social_obs_df = test[test['category'] == 'Social history']
exam_obs_df = test[test['category'] == 'Exam']

In [18]:
vital_obs_df.head(2)

Unnamed: 0,uuid,entry_id,category,code,code_system,code_text,code_value,code_unit,record_date
0,8f8b9664-6af2-a8f6-694e-1eed399ea223,9679dc6a-ba35-9314-7ed5-ad3c877ca79c,Vital signs,8302-2,http://loinc.org,Body Height,171.6,cm,2002-04-26
1,8f8b9664-6af2-a8f6-694e-1eed399ea223,9679dc6a-ba35-9314-7ed5-ad3c877ca79c,Vital signs,72514-3,http://loinc.org,Pain severity - 0-10 verbal numeric rating [Sc...,2.0,{score},2002-04-26


In [19]:
survey_obs_df.head(2)

Unnamed: 0,uuid,entry_id,category,code,code_system,code_text,code_value,code_unit,record_date
24,8f8b9664-6af2-a8f6-694e-1eed399ea223,9679dc6a-ba35-9314-7ed5-ad3c877ca79c,Survey,76501-6,http://loinc.org,"Within the last year, have you been afraid of ...",Yes,LA33-6,2002-04-26
25,8f8b9664-6af2-a8f6-694e-1eed399ea223,9679dc6a-ba35-9314-7ed5-ad3c877ca79c,Survey,93026-3,http://loinc.org,Do you feel physically and emotionally safe wh...,Yes,LA33-6,2002-04-26


In [20]:
lab_obs_df.head(2)

Unnamed: 0,uuid,entry_id,category,code,code_system,code_text,code_value,code_unit,record_date
8,8f8b9664-6af2-a8f6-694e-1eed399ea223,9679dc6a-ba35-9314-7ed5-ad3c877ca79c,Laboratory,2093-3,http://loinc.org,Cholesterol [Mass/volume] in Serum or Plasma,286.57,mg/dL,2002-04-26
9,8f8b9664-6af2-a8f6-694e-1eed399ea223,9679dc6a-ba35-9314-7ed5-ad3c877ca79c,Laboratory,2571-8,http://loinc.org,Triglycerides,122.91,mg/dL,2002-04-26


In [21]:
social_obs_df.head(2)

Unnamed: 0,uuid,entry_id,category,code,code_system,code_text,code_value,code_unit,record_date
23,8f8b9664-6af2-a8f6-694e-1eed399ea223,9679dc6a-ba35-9314-7ed5-ad3c877ca79c,Social history,72166-2,http://loinc.org,Tobacco smoking status,Never smoked tobacco (finding),266919005,2002-04-26
57,8f8b9664-6af2-a8f6-694e-1eed399ea223,3c59053b-c104-f70c-13c5-2fbaf4083b96,Social history,72166-2,http://loinc.org,Tobacco smoking status,Never smoked tobacco (finding),266919005,2004-04-30


In [22]:
exam_obs_df.head(2)

Unnamed: 0,uuid,entry_id,category,code,code_system,code_text,code_value,code_unit,record_date
262,8f8b9664-6af2-a8f6-694e-1eed399ea223,145ddbe1-cd4d-4fb5-8851-f95eccbc8639,Exam,69453-9,http://loinc.org,Cause of Death [US Standard Certificate of Death],Sudden Cardiac Death,95281009,2010-06-04


### Medication Request

In [23]:
condition_ind = [ind for ind, entry in enumerate(ex_resources) if entry.resource_type == 'MedicationRequest']
test = ex_resources[condition_ind[0]]
test

MedicationRequest(resource_type='MedicationRequest', fhir_comments=None, id='1f404524-05c5-7cac-612d-2e9627654c17', implicitRules=None, implicitRules__ext=None, language=None, language__ext=None, meta=Meta(resource_type='Meta', fhir_comments=None, extension=None, id=None, lastUpdated=None, lastUpdated__ext=None, profile=['http://hl7.org/fhir/us/core/StructureDefinition/us-core-medicationrequest'], profile__ext=None, security=None, source=None, source__ext=None, tag=None, versionId=None, versionId__ext=None), contained=None, extension=None, modifierExtension=None, text=None, authoredOn=datetime.datetime(1959, 5, 3, 11, 22, 17, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), authoredOn__ext=None, basedOn=None, category=[CodeableConcept(resource_type='CodeableConcept', fhir_comments=None, extension=None, id=None, coding=[Coding(resource_type='Coding', fhir_comments=None, extension=None, id=None, code='community', code__ext=None, display='Community', display__ext=Non

In [24]:
print(test.resource_type)
print(test.encounter.reference.replace('urn:uuid:', ''))
print(test.category[0].coding[0].display)
print(test.dosageInstruction[0].text)
print(test.insurance)
print(test.intent)
print(test.medicationCodeableConcept.coding[0].display)
print(test.medicationCodeableConcept.coding[0].code)
print(test.medicationCodeableConcept.coding[0].system)
#print(test.reasonReference[0].display)
print(test.requester.display)
print(test.subject.reference.replace('urn:uuid:', ''))

MedicationRequest
6c6d8d72-867b-5ca5-eab8-a833510701f1
Community
Take as needed.
None
order
diphenhydrAMINE Hydrochloride 25 MG Oral Tablet
1049630
http://www.nlm.nih.gov/research/umls/rxnorm
Dr. Cletus494 Paucek755
8f8b9664-6af2-a8f6-694e-1eed399ea223


In [25]:
condition_ind = [ind for ind, entry in enumerate(ex_resources) if entry.resource_type == 'MedicationRequest']
test = ex_resources[condition_ind[1]]
test

MedicationRequest(resource_type='MedicationRequest', fhir_comments=None, id='96b982f8-8793-0454-4ccf-49406afda8a9', implicitRules=None, implicitRules__ext=None, language=None, language__ext=None, meta=Meta(resource_type='Meta', fhir_comments=None, extension=None, id=None, lastUpdated=None, lastUpdated__ext=None, profile=['http://hl7.org/fhir/us/core/StructureDefinition/us-core-medicationrequest'], profile__ext=None, security=None, source=None, source__ext=None, tag=None, versionId=None, versionId__ext=None), contained=None, extension=None, modifierExtension=None, text=None, authoredOn=datetime.datetime(2008, 12, 20, 12, 59, 46, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=68400))), authoredOn__ext=None, basedOn=None, category=[CodeableConcept(resource_type='CodeableConcept', fhir_comments=None, extension=None, id=None, coding=[Coding(resource_type='Coding', fhir_comments=None, extension=None, id=None, code='community', code__ext=None, display='Community', display__ext=N

In [26]:
print(test.resource_type)
print(test.encounter.reference.replace('urn:uuid:', ''))
print(test.category[0].coding[0].display)
#print(test.dosageInstruction[0].text)
print(test.insurance)
print(test.intent)
print(test.medicationCodeableConcept.coding[0].display)
print(test.medicationCodeableConcept.coding[0].code)
print(test.medicationCodeableConcept.coding[0].system)
print(test.reasonReference[0].display)
print(test.requester.display)
print(str(test.authoredOn.date()))
print(test.subject.reference.replace('urn:uuid:', ''))

MedicationRequest
8157ac60-0e4d-2d0c-35d1-92051a956ec7
Community
None
order
clonazePAM 0.25 MG Oral Tablet
204892
http://www.nlm.nih.gov/research/umls/rxnorm
Seizure disorder
Dr. Dean966 Kris249
2008-12-20
8f8b9664-6af2-a8f6-694e-1eed399ea223


In [27]:
def get_medication_entry(medication_obj):
    pt_id = []
    med_entry_id = []
    med_category = []
    med_code = []
    med_system = []
    med_text = []
    med_insurance = []
    med_intent = []
    med_dosage =  []
    med_reason = []
    med_requester = []
    med_date = []
    
    pt_id.append(medication_obj.subject.reference.replace('urn:uuid:', ''))
    med_entry_id.append(medication_obj.encounter.reference.replace('urn:uuid:', ''))
    med_category.append(medication_obj.category[0].coding[0].display)
    med_code.append(medication_obj.medicationCodeableConcept.coding[0].code)
    med_system.append(medication_obj.medicationCodeableConcept.coding[0].system)
    med_text.append(medication_obj.medicationCodeableConcept.coding[0].display)
    med_insurance.append(medication_obj.insurance)
    med_intent.append(medication_obj.intent)
    if medication_obj.dosageInstruction is not None:
        med_dosage.append(medication_obj.dosageInstruction[0].text)
    else:
        med_dosage.append(None)
    if medication_obj.reasonReference is not None:
         med_reason.append(medication_obj.reasonReference[0].display)
    else:
         med_reason.append(None)
    med_requester.append(medication_obj.requester.display)
    med_date.append(str(medication_obj.authoredOn.date()))
    medication_entry = {
        'uuid':pt_id,
        'entry_id':med_entry_id,
        'category':med_category,
        'code':med_code,
        'code_system':med_system,
        'code_text':med_text,
        'insurance':med_insurance,
        'intent':med_intent,
        'dosage':med_dosage,
        'reason':med_reason,
        'requester':med_requester,
        'request_date':med_date
    }
    return medication_entry

def create_medication_entries(resource_obj):
    medication_entries = {
    'uuid':[],
    'entry_id':[],
    'category':[],
    'code':[],
    'code_system':[],
    'code_text':[],
    'insurance':[],
    'intent':[],
    'dosage':[],
    'reason':[],
    'requester':[],
    'request_date':[]
    }
    medication_ind = [ind for ind, entry in enumerate(resource_obj) if entry.resource_type == 'MedicationRequest']
    for ind in medication_ind:
        entry = ex_resources[ind]
        entry_dict = get_medication_entry(entry)
        for key, item in entry_dict.items():
            medication_entries[key] += item
    return medication_entries

In [28]:
pd.DataFrame(create_medication_entries(ex_resources))

Unnamed: 0,uuid,entry_id,category,code,code_system,code_text,insurance,intent,dosage,reason,requester,request_date
0,8f8b9664-6af2-a8f6-694e-1eed399ea223,6c6d8d72-867b-5ca5-eab8-a833510701f1,Community,1049630,http://www.nlm.nih.gov/research/umls/rxnorm,diphenhydrAMINE Hydrochloride 25 MG Oral Tablet,,order,Take as needed.,,Dr. Cletus494 Paucek755,1959-05-03
1,8f8b9664-6af2-a8f6-694e-1eed399ea223,8157ac60-0e4d-2d0c-35d1-92051a956ec7,Community,204892,http://www.nlm.nih.gov/research/umls/rxnorm,clonazePAM 0.25 MG Oral Tablet,,order,,Seizure disorder,Dr. Dean966 Kris249,2008-12-20


### Procedure

In [33]:
resource_types = []
for entry in ex_resources:
    resource_types.append(entry.resource_type)
set(resource_types)

{'AllergyIntolerance',
 'CarePlan',
 'CareTeam',
 'Claim',
 'Condition',
 'DiagnosticReport',
 'DocumentReference',
 'Encounter',
 'ExplanationOfBenefit',
 'Immunization',
 'MedicationRequest',
 'Observation',
 'Patient',
 'Procedure',
 'Provenance'}

In [67]:
procedure_ind = [ind for ind, entry in enumerate(ex_resources) if entry.resource_type == 'Procedure']
test = ex_resources[procedure_ind[66]]
test

Procedure(resource_type='Procedure', fhir_comments=None, id='06d27ee3-f3a4-a01f-fad8-85f8d9518d0e', implicitRules=None, implicitRules__ext=None, language=None, language__ext=None, meta=Meta(resource_type='Meta', fhir_comments=None, extension=None, id=None, lastUpdated=None, lastUpdated__ext=None, profile=['http://hl7.org/fhir/us/core/StructureDefinition/us-core-procedure'], profile__ext=None, security=None, source=None, source__ext=None, tag=None, versionId=None, versionId__ext=None), contained=None, extension=None, modifierExtension=None, text=None, asserter=None, basedOn=None, bodySite=None, category=None, code=CodeableConcept(resource_type='CodeableConcept', fhir_comments=None, extension=None, id=None, coding=[Coding(resource_type='Coding', fhir_comments=None, extension=None, id=None, code='385763009', code__ext=None, display='Hospice care (regime/therapy)', display__ext=None, system='http://snomed.info/sct', system__ext=None, userSelected=None, userSelected__ext=None, version=None,

In [52]:
print(test.resource_type)
print(test.encounter.reference.replace('urn:uuid:', ''))
print(test.code.coding[0].display)
print(test.code.coding[0].code)
print(test.code.coding[0].system)
print(test.location.display)
print(str(test.performedPeriod.start.date()))
print(str(test.performedPeriod.end.date()))
print(str((test.performedPeriod.end - test.performedPeriod.start).seconds))
print(test.subject.reference.replace('urn:uuid:', ''))

Procedure
9679dc6a-ba35-9314-7ed5-ad3c877ca79c
Medication Reconciliation (procedure)
430193006
http://snomed.info/sct
CARING HEALTH CENTER, INC
2002-04-26
2002-04-26
900
8f8b9664-6af2-a8f6-694e-1eed399ea223


In [61]:
def get_procedure_entry(procedure_obj):
    pt_id = []
    proc_entry_id = []
    proc_code = []
    proc_system = []
    proc_text = []
    proc_location = []
    proc_start_date = []
    proc_end_date = []
    proc_duration_seconds = []
    
    pt_id.append(procedure_obj.subject.reference.replace('urn:uuid:', ''))
    proc_entry_id.append(procedure_obj.encounter.reference.replace('urn:uuid:', ''))
    proc_code.append(procedure_obj.code.coding[0].code)
    proc_system.append(procedure_obj.code.coding[0].system)
    proc_text.append(procedure_obj.code.coding[0].display)
    proc_location.append(procedure_obj.location.display)
    proc_start_date.append(str(procedure_obj.performedPeriod.start.date()))
    proc_end_date.append(str(procedure_obj.performedPeriod.end.date()))
    proc_duration_seconds.append(str((procedure_obj.performedPeriod.end - procedure_obj.performedPeriod.start).seconds))
    procedure_entry = {
        'uuid':pt_id,
        'entry_id':proc_entry_id,
        'code':proc_code,
        'code_system':proc_system,
        'code_text':proc_text,
        'location':proc_location,
        'start_date':proc_start_date,
        'end_date':proc_end_date,
        'duration_seconds':proc_duration_seconds
    }
    return procedure_entry

def create_procedure_entries(resource_obj):
    procedure_entries = {
    'uuid':[],
    'entry_id':[],
    'code':[],
    'code_system':[],
    'code_text':[],
    'location':[],
    'start_date':[],
    'end_date':[],
    'duration_seconds':[]
    }
    procedure_ind = [ind for ind, entry in enumerate(resource_obj) if entry.resource_type == 'Procedure']
    for ind in procedure_ind:
        entry = ex_resources[ind]
        entry_dict = get_procedure_entry(entry)
        for key, item in entry_dict.items():
            procedure_entries[key] += item
    return procedure_entries

In [68]:
pd.DataFrame(create_procedure_entries(ex_resources)).head(2)

Unnamed: 0,uuid,entry_id,code,code_system,code_text,location,start_date,end_date,duration_seconds
0,8f8b9664-6af2-a8f6-694e-1eed399ea223,9679dc6a-ba35-9314-7ed5-ad3c877ca79c,430193006,http://snomed.info/sct,Medication Reconciliation (procedure),"CARING HEALTH CENTER, INC",2002-04-26,2002-04-26,900
1,8f8b9664-6af2-a8f6-694e-1eed399ea223,9679dc6a-ba35-9314-7ed5-ad3c877ca79c,710824005,http://snomed.info/sct,Assessment of health and social care needs (pr...,"CARING HEALTH CENTER, INC",2002-04-26,2002-04-26,3279


### Claims

In [72]:
claim_ind = [ind for ind, entry in enumerate(ex_resources) if entry.resource_type == 'Claim']
test = ex_resources[claim_ind[0]]
test

Claim(resource_type='Claim', fhir_comments=None, id='3ae5c65a-77aa-876a-e0b6-11f9f64ca247', implicitRules=None, implicitRules__ext=None, language=None, language__ext=None, meta=None, contained=None, extension=None, modifierExtension=None, text=None, accident=None, billablePeriod=Period(resource_type='Period', fhir_comments=None, extension=None, id=None, end=datetime.datetime(1959, 5, 3, 11, 37, 17, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), end__ext=None, start=datetime.datetime(1959, 5, 3, 11, 22, 17, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), start__ext=None), careTeam=None, created=datetime.datetime(1959, 5, 3, 11, 37, 17, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=72000))), created__ext=None, diagnosis=None, enterer=None, facility=None, fundsReserve=None, identifier=None, insurance=[ClaimInsurance(resource_type='ClaimInsurance', fhir_comments=None, extension=None, id=None, modifierExtension=None, businessArra

In [107]:
print(test.resource_type)
print(test.item[0].encounter[0].reference.replace('urn:uuid:', ''))
print(test.item[0].productOrService.coding[0].display)
print(test.item[0].productOrService.coding[0].code)
print(test.item[0].productOrService.coding[0].system)
print(test.insurance[0].coverage.display)
print(test.payee)
print(test.prescription.display)
print(test.provider.display)
print(test.total.value)
print(test.total.currency)
print(test.type.coding[0].code)
print(str(test.billablePeriod.start.date()))
print(str(test.billablePeriod.end.date()))
print(str((test.billablePeriod.end - test.billablePeriod.start).seconds))
print(test.patient.reference.replace('urn:uuid:', ''))

Claim
6c6d8d72-867b-5ca5-eab8-a833510701f1
diphenhydrAMINE Hydrochloride 25 MG Oral Tablet
1049630
http://www.nlm.nih.gov/research/umls/rxnorm
Medicaid
None
None
ENCOMPASS HEALTH REHAB HOSPITAL OF WESTERN MASS
137.92
USD
pharmacy
1959-05-03
1959-05-03
900
8f8b9664-6af2-a8f6-694e-1eed399ea223


In [114]:
def get_claim_entry(claim_obj):
    pt_id = []
    claim_entry_id = []
    claim_code = []
    claim_system = []
    claim_item = []
    claim_payee = []
    claim_provider = []
    claim_insurance = []
    claim_value = []
    claim_unit = []
    claim_type = []
    claim_date = []
    
    pt_id.append(claim_obj.patient.reference.replace('urn:uuid:', ''))
    claim_entry_id.append(claim_obj.item[0].encounter[0].reference.replace('urn:uuid:', ''))
    claim_code.append(claim_obj.item[0].productOrService.coding[0].code)
    claim_system.append(claim_obj.item[0].productOrService.coding[0].system)
    claim_item.append(claim_obj.item[0].productOrService.coding[0].display)
    claim_payee.append(claim_obj.payee)
    claim_provider.append(claim_obj.provider.display)
    claim_insurance.append(claim_obj.insurance[0].coverage.display)
    claim_value.append(claim_obj.total.value)
    claim_unit.append(claim_obj.total.currency)
    claim_type.append(claim_obj.type.coding[0].code)
    claim_date.append(str(claim_obj.billablePeriod.start.date()))
    claim_entry = {
        'uuid':pt_id,
        'entry_id':claim_entry_id,
        'item_code':claim_code,
        'item_system':claim_system,
        'item_text':claim_item,
        'payee':claim_payee,
        'provider':claim_provider,
        'insurance':claim_insurance,
        'item_value':claim_value,
        'item_unit':claim_unit,
        'claim_type':claim_type,
        'claim_date':claim_date
    }
    return claim_entry

def create_claim_entries(resource_obj):
    claim_entries = {
    'uuid':[],
    'entry_id':[],
    'item_code':[],
    'item_system':[],
    'item_text':[],
    'payee':[],
    'provider':[],
    'insurance':[],
    'item_value':[],
    'item_unit':[],
    'claim_type':[],
    'claim_date':[]
    }
    claim_ind = [ind for ind, entry in enumerate(resource_obj) if entry.resource_type == 'Claim']
    for ind in claim_ind:
        entry = ex_resources[ind]
        entry_dict = get_claim_entry(entry)
        for key, item in entry_dict.items():
            claim_entries[key] += item
    return claim_entries

In [116]:
pd.DataFrame(create_claim_entries(ex_resources)).head()

Unnamed: 0,uuid,entry_id,item_code,item_system,item_text,payee,provider,insurance,item_value,item_unit,claim_type,claim_date
0,8f8b9664-6af2-a8f6-694e-1eed399ea223,6c6d8d72-867b-5ca5-eab8-a833510701f1,1049630,http://www.nlm.nih.gov/research/umls/rxnorm,diphenhydrAMINE Hydrochloride 25 MG Oral Tablet,,ENCOMPASS HEALTH REHAB HOSPITAL OF WESTERN MASS,Medicaid,137.92,USD,pharmacy,1959-05-03
1,8f8b9664-6af2-a8f6-694e-1eed399ea223,6c6d8d72-867b-5ca5-eab8-a833510701f1,185345009,http://snomed.info/sct,Encounter for symptom,,ENCOMPASS HEALTH REHAB HOSPITAL OF WESTERN MASS,Medicaid,85.55,USD,professional,1959-05-03
2,8f8b9664-6af2-a8f6-694e-1eed399ea223,b5ed98d8-dbd5-833e-7567-d1461e87a0b8,185347001,http://snomed.info/sct,Encounter for problem,,ENCOMPASS HEALTH REHAB HOSPITAL OF WESTERN MASS,Medicaid,85.55,USD,professional,1959-05-12
3,8f8b9664-6af2-a8f6-694e-1eed399ea223,a12ccdf4-16fb-bc58-140d-a55d7c449144,162673000,http://snomed.info/sct,General examination of patient (procedure),,"CARING HEALTH CENTER, INC",Medicaid,568.2,USD,professional,1974-05-31
4,8f8b9664-6af2-a8f6-694e-1eed399ea223,586a8ff8-44de-508d-5bc5-867327f7e277,162673000,http://snomed.info/sct,General examination of patient (procedure),,"CARING HEALTH CENTER, INC",Medicaid,778.78,USD,professional,1984-06-15


### Patient Bundle

In [160]:
class FihrRecord:
    def __init__(self, file_path):
        self.fihr_file_path = file_path
        self.fihr_object = read_fihr_json(self.fihr_file_path)
        self.__patient = create_patient_entry(self.fihr_object)
        self.__conditions = create_condition_entries(self.fihr_object)
        self.__observations = create_observation_entries(self.fihr_object)
        self.__medications = create_medication_entries(self.fihr_object)
        self.__procedures = create_procedure_entries(self.fihr_object)
        self.__claims = create_claim_entries(self.fihr_object)
    
    def patient_table(self):
        return pd.DataFrame(self.__patient, index=[0])
    
    def conditions_table(self):
        return pd.DataFrame(self.__conditions)

    def observations_table(self):
        return pd.DataFrame(self.__observations)
    
    def medications_table(self):
        return pd.DataFrame(self.__medications)
    
    def procedures_table(self):
        return pd.DataFrame(self.__procedures)
    
    def claims_table(self):
        return pd.DataFrame(self.__claims)

In [161]:
ex_fihr_class = FihrRecord(example)

In [162]:
ex_fihr_class.fihr_file_path

'output/fhir/Aaron697_Eichmann909_8f8b9664-6af2-a8f6-694e-1eed399ea223.json'

In [163]:
ex_fihr_class.patient_table()

Unnamed: 0,uuid,first_name,middle_name,family_name,gender,birth_date,marital_status,general_practioner,age_years,country,state,city,location_lat,location_long
0,8f8b9664-6af2-a8f6-694e-1eed399ea223,Aaron697,Don899,Eichmann909,male,1956-04-06,Never Married,,67,US,MA,East Longmeadow,42.057828,-72.456283


### Reading Multiple Records