In [1]:
!pip install pandas
!pip install ndjson
import pandas as pd




### Each file is a resource type. Each object is a resource.
Lets look at a resource to see what it includes

In [2]:
# I want to be effeicient so I'm not parsing manually.
# Pandas doesn't appear to work well with ndjson out of the box 
# Install ndjson (I've never used this lib before but it looks straight forward)

import ndjson

In [3]:
file_path = './data/AllergyIntolerance.ndjson'
with open(file_path) as inf:
    j = ndjson.load(inf)


In [4]:
j[0]

{'resourceType': 'AllergyIntolerance',
 'id': 'e4e9794e-93a3-4528-8272-b527f653e4b9',
 'meta': {'profile': ['http://hl7.org/fhir/us/core/StructureDefinition/us-core-allergyintolerance']},
 'clinicalStatus': {'coding': [{'system': 'http://terminology.hl7.org/CodeSystem/allergyintolerance-clinical',
    'code': 'active'}]},
 'verificationStatus': {'coding': [{'system': 'http://terminology.hl7.org/CodeSystem/allergyintolerance-verification',
    'code': 'confirmed'}]},
 'type': 'allergy',
 'category': ['food'],
 'criticality': 'low',
 'code': {'coding': [{'system': 'http://snomed.info/sct',
    'code': '232347008',
    'display': 'Dander (animal) allergy'}],
  'text': 'Dander (animal) allergy'},
 'patient': {'reference': 'Patient/34ff2141-1565-4564-a801-18f019fa10ab'},
 'recordedDate': '2010-09-18T03:20:03-04:00'}

### Patient resource seems like a good starting point
I noticed a reference to a Patient from the first Allergy Intolerance resource. The instructions say a resource can indirectly reference the patient, by referencing a
resource that references the patient. Maybe this particular resource is an intermediate reference?

If a resource(1) references a resource(2), the referenced resource(2) could reference a patient

A resource that references another resource, that contains a patient reference, is indirectly referencing the patient

In [5]:
file_path = './data/Patient.ndjson'
i=0
with open(file_path) as inf:
    patients = ndjson.load(inf)        

In [6]:
for patient in patients:    
    names = patient['name']
    if len(names) > 1:
        for i,name in enumerate(names):
            if name['use'] == 'official':
                first_name = names[i]['given']
                last_name = names[i]['family']
                print(first_name, last_name)
            
names

['Treena759'] Abernathy524
['Sadye922'] Gislason620
['Rosalia943'] Yost751
['Alexis664'] Rogahn59
['Alane21'] Schamberger479
['Celesta195'] Nader710
['Yetta429'] Wuckert783
['Johnny786'] Pollich983


[{'use': 'official',
  'family': 'Pollich983',
  'given': ['Johnny786'],
  'prefix': ['Mrs.']},
 {'use': 'maiden',
  'family': 'Zulauf375',
  'given': ['Johnny786'],
  'prefix': ['Mrs.']}]

### Why are given names lists?

- all lists are 1 element
- maybe first name changedes. Check if there are IDs with different first names

In [7]:
df = pd.DataFrame(columns=['id', 'use', 'given','family'])
for patient in patients:    
    names = patient['name']
    for j, name in enumerate(names):        
        #print(patient['id'], names[j]['use'], names[j]['given'], names[j]['family'])
        df = df.append(pd.DataFrame(
                              {'id':[patient['id']], 
                               'use':[names[j]['use']], 
                               'given':[names[j]['given']], 
                               'family':[names[j]['family']]})
                         )

        

In [1]:
patient_ids = df.id.unique()

NameError: name 'df' is not defined

### We are counting the number fo resources for a given Patient
So parse the command line for `patient id (Patient.id)`, `first name (Patient.name.given)`, `last name(Patient.name.family)`, where `Patient.name.use=='official'`

Import glob, get a list of ndjsons
- is the patient reference always in the same field? **no**


Patient key in root
- j[0]['patient']['reference']

Patient in neted key
- j[0]['subject']['reference']


Provenacne might be useful for a shallow scan of resources or a verification


In [9]:
import glob
resources = glob.glob('./data/*.ndjson')
#print(resources)
for resource in resources:
    with open (resource, 'r') as inf:
        j = ndjson.load(inf)    
    if 'patient' in j[0].keys():
        print('Patient\troot\t', resource)
        ref = j[0]['patient']['reference']
        
        
    elif 'subject' in j[0].keys():
        print('Subject\tnested\t', resource)
        ref = j[0]['subject']['reference']
        
        
        
    else:
        print('None\t', resource)
        #Provenence DOES references patients
            # Seems like a master list of references
        #Group DOES reference patients
            # Contains a lsit of patients in this "group"
        
        #Locations doesn't reference patients        
        #Practitionerrole doesn't reference patients
        #practitioner doesn't reference patients
        #Organization doesn't reference aptiens
        #medication doesn't reference patienrts
    #print('\n')
    #print(j[0].keys())

Patient	root	 ./data/Immunization.ndjson
Patient	root	 ./data/SupplyDelivery.ndjson
Subject	nested	 ./data/ImagingStudy.ndjson
Patient	root	 ./data/ExplanationOfBenefit.ndjson
Subject	nested	 ./data/MedicationRequest.ndjson
None	 ./data/Group.ndjson
None	 ./data/Location.ndjson
None	 ./data/Provenance.ndjson
Subject	nested	 ./data/MedicationAdministration.ndjson
Subject	nested	 ./data/Observation.ndjson
None	 ./data/Patient.ndjson
Subject	nested	 ./data/CarePlan.ndjson
Subject	nested	 ./data/Encounter.ndjson
Patient	root	 ./data/Claim.ndjson
None	 ./data/PractitionerRole.ndjson
Subject	nested	 ./data/DiagnosticReport.ndjson
None	 ./data/Practitioner.ndjson
Subject	nested	 ./data/CareTeam.ndjson
None	 ./data/Organization.ndjson
Patient	root	 ./data/Device.ndjson
None	 ./data/Medication.ndjson
Subject	nested	 ./data/Condition.ndjson
Subject	nested	 ./data/Procedure.ndjson
Patient	root	 ./data/AllergyIntolerance.ndjson
Subject	nested	 ./data/DocumentReference.ndjson


### provenance seems like it has everything I would need to complete the assignment
but since the instructions say to search through the resources, I will use a blunt force pproach to search each file for direct references

In [10]:
def load_provenance():
    import pandas as pd
    with open('./data/Provenance.ndjson', 'r') as inf:
        provenance = ndjson.load(inf)

    references = {}
    for i, prov in enumerate(provenance[:]):
        targets = prov['target']

        patient_id = targets[0]['reference'].split('/')[1]
        references[patient_id] = {}

        for target in targets:
            resource_type = target['reference'].split('/')[0]        
            if resource_type == 'Patient':
                continue

            # Count resources per patient
            if resource_type not in references[patient_id]:
                references[patient_id][resource_type] = 1            
            else:
                references[patient_id][resource_type] += 1
    return references

In [12]:
references = load_provenance()
provenance = pd.DataFrame.from_dict(references, orient='index').reset_index().fillna(0)

In [14]:
provenance

Unnamed: 0,index,Location,Organization,Practitioner,PractitionerRole,Encounter,Observation,Immunization,DiagnosticReport,DocumentReference,...,Condition,MedicationRequest,CareTeam,CarePlan,SupplyDelivery,Medication,MedicationAdministration,ImagingStudy,Device,AllergyIntolerance
0,b716e854-c172-4384-bc92-46a51f2dd91b,2,2,2,2,11,119,15,15,11,...,5,1.0,3,3,0.0,0.0,0.0,0.0,0.0,0.0
1,ebf9231d-6a1f-432a-90c2-bc1b340ae047,2,2,2,2,29,189,30,32,29,...,12,2.0,3,3,0.0,0.0,0.0,0.0,0.0,0.0
2,db89be6a-a4c5-4f3c-b62c-a5e1b6abe983,3,3,3,3,52,109,11,59,52,...,15,7.0,7,7,0.0,0.0,0.0,0.0,0.0,0.0
3,d8e414ef-9d1f-432a-88e6-b1bcdadeb711,2,2,2,2,93,135,11,101,93,...,14,56.0,6,6,0.0,0.0,0.0,0.0,0.0,0.0
4,421c3eaf-f95c-47af-b8cd-f6cbcb192fad,3,3,3,3,20,632,13,80,20,...,19,10.0,2,2,72.0,3.0,3.0,0.0,0.0,0.0
5,c4768f2a-f932-4ab6-a4a5-6e8ae0f9da8d,4,4,4,4,72,292,11,96,72,...,25,25.0,9,9,0.0,0.0,0.0,0.0,0.0,0.0
6,29b1e7a6-3664-4667-b9ae-2c166aa48b28,2,2,2,2,87,375,10,119,87,...,20,126.0,8,8,0.0,0.0,0.0,1.0,0.0,0.0
7,2116b133-bab9-483e-8afb-e90d6d875962,2,2,2,2,12,75,7,19,12,...,7,0.0,5,5,0.0,0.0,0.0,0.0,0.0,0.0
8,d13874ec-22ea-46ed-a55c-1fd75ef56a58,2,2,2,2,8,93,11,12,8,...,8,0.0,2,2,0.0,0.0,0.0,0.0,0.0,0.0
9,ae7f3b20-ac42-423b-9386-884d14a5cfc5,2,2,2,2,65,1187,12,198,65,...,19,66.0,5,5,131.0,8.0,8.0,0.0,2.0,0.0


In [20]:
import ndjson
import glob

class Patient:

    """A patient"""
    def __init__(self, patient_id=None, first_name=None, last_name=None):
        self.first_name = first_name
        self.last_name = last_name
        self.patient_id = patient_id
        
        # lookup patient ID, or first and alst name
        self.lookup_patient()
        
        # lookup references
        self.lookup_references()

        # lookup references from ecnounters
        self.look_encounters()
        
    
    def load_patients(self):
        """Loads Patient file"""
        with open('./data/Patient.ndjson', 'r') as inf:
            patients = ndjson.load(inf)
        return patients
    
    
    def lookup_patient(self):
        """Look up Patients by ID or first and last name"""
        patients = self.load_patients()
        if self.patient_id != None: 
            for patient in patients:
                if patient['id'] == self.patient_id:
                    names = patient['name']        
                    for name in names: # Not needed because [0] is always "official", but keeping it anyway as a check
                        if name['use'] == 'official':
                            self.first_name = name['given'][0]
                            self.last_name = name['family']                            
                            break
                    break            
            
        else:
            if (self.first_name == None) | (self.last_name == None):
                print('You need to provide a patient id, or first name and last name')
            # lookup patient by name
            for patient in patients:                
                if(patient['name'][0]['given'][0] == self.first_name) & (patient['name'][0]['family'] == self.last_name):
                    self.patient_id = patient['id']
                    break   
    

    def load_resources(self):
        """Loads resource filepaths"""
        return glob.glob('./data/*.ndjson')


    def lookup_references(self):
        """Looks up references to a patient"""
        resource_paths = self.load_resources()
        self.references = {}
        
        for resource_path in resource_paths:
            with open(resource_path, 'r') as inf:
                resources = ndjson.load(inf)
            
            for resource in resources:
                if 'patient' in resource.keys():
                    if resource['patient']['reference'].split('/')[1] == self.patient_id:
                        resource_type = resource['resourceType']
                        if resource_type not in self.references.keys():
                            self.references[resource_type] = 1
                        else:
                            self.references[resource_type] += 1
                elif 'subject' in resource.keys():
                    if resource['subject']['reference'].split('/')[1] == self.patient_id:
                        resource_type = resource['resourceType']
                        if resource_type not in self.references.keys():
                            self.references[resource_type] = 1
                        else:
                            self.references[resource_type] += 1


    def look_encounters(self):
        with open('./data/Encounter.ndjson', 'r') as inf:
            encounter_file = ndjson.load(inf)

        for i, encounter in enumerate(encounter_file):
            patient_reference = patient = encounter['subject']['reference'].split('/')[1]
            
            if patient_reference == self.patient_id:                
                patient = encounter['subject']['reference']

                practitioner = encounter['participant'][0]['individual']['reference']        
                if 'Practitioner' not in self.references.keys():
                    self.references['Practitioner'] = 1
                else:
                    self.references['Practitioner'] += 1

                location = encounter['location'][0]['location']['reference']
                if 'Location' not in self.references.keys():
                    self.references['Location'] = 1
                else:
                    self.references['Location'] += 1

                organization = encounter['serviceProvider']['reference']
                if 'Organization' not in self.references.keys():
                    self.references['Organization'] = 1
                else:
                    self.references['Organization'] += 1


    def print_report(self):
        print("Patient Name:\t", patient.first_name, patient.last_name)
        print("Patient ID:\t", patient.patient_id)
        print("\n")
        print(f"{'RESOURCE_TYPE':25}{'COUNT':<25}")
        print(f"{'-'*30}")
        sorted_references = sorted(patient.references.items(), key=lambda x: x[1], reverse=True)
        for i in sorted_references:
                print(f'{i[0]:25} {i[1]:<25}')

In [21]:
pat1 = Patient('ebf9231d-6a1f-432a-90c2-bc1b340ae047')
pat2 = Patient(first_name='Rosamond509', last_name='Lynch190')
pat3 = Patient()

You need to provide a patient id, or first name and last name


In [22]:
pat1.__dict__

{'first_name': 'Mohamed943',
 'last_name': 'Treutel973',
 'patient_id': 'ebf9231d-6a1f-432a-90c2-bc1b340ae047',
 'references': {'Immunization': 30,
  'ExplanationOfBenefit': 29,
  'MedicationRequest': 2,
  'Observation': 189,
  'CarePlan': 3,
  'Encounter': 29,
  'Claim': 31,
  'DiagnosticReport': 32,
  'CareTeam': 3,
  'Condition': 12,
  'Procedure': 15,
  'DocumentReference': 29,
  'Practitioner': 29,
  'Location': 29,
  'Organization': 29}}

In [23]:
pat2.__dict__

{'first_name': 'Rosamond509',
 'last_name': 'Lynch190',
 'patient_id': '34ff2141-1565-4564-a801-18f019fa10ab',
 'references': {'Immunization': 20,
  'ExplanationOfBenefit': 20,
  'MedicationRequest': 5,
  'Observation': 180,
  'CarePlan': 3,
  'Encounter': 20,
  'Claim': 25,
  'DiagnosticReport': 24,
  'CareTeam': 3,
  'Condition': 12,
  'Procedure': 9,
  'AllergyIntolerance': 2,
  'DocumentReference': 20,
  'Practitioner': 20,
  'Location': 20,
  'Organization': 20}}

In [24]:
print(pat1.first_name, pat1.last_name, pat1.patient_id)

Mohamed943 Treutel973 ebf9231d-6a1f-432a-90c2-bc1b340ae047


In [25]:
pat1.references

{'Immunization': 30,
 'ExplanationOfBenefit': 29,
 'MedicationRequest': 2,
 'Observation': 189,
 'CarePlan': 3,
 'Encounter': 29,
 'Claim': 31,
 'DiagnosticReport': 32,
 'CareTeam': 3,
 'Condition': 12,
 'Procedure': 15,
 'DocumentReference': 29,
 'Practitioner': 29,
 'Location': 29,
 'Organization': 29}

In [26]:
pat2.references

{'Immunization': 20,
 'ExplanationOfBenefit': 20,
 'MedicationRequest': 5,
 'Observation': 180,
 'CarePlan': 3,
 'Encounter': 20,
 'Claim': 25,
 'DiagnosticReport': 24,
 'CareTeam': 3,
 'Condition': 12,
 'Procedure': 9,
 'AllergyIntolerance': 2,
 'DocumentReference': 20,
 'Practitioner': 20,
 'Location': 20,
 'Organization': 20}

At this point I see that there are probably some indirect references that are missing

- Location
- Organization
- Practitioner
- PractitionerRole
- Provenance

Provenance seems to be the only thing that includes all references. But I'm not sure why a Patient resource would be referenced by Practitioner, PractitionerRole, Organization, etc. 

Encounter seems to reference Patient, Practitioner, Role, location, etc. But It's still unclear why you would include those in the report. 

In [27]:
references = load_provenance()
references['d13874ec-22ea-46ed-a55c-1fd75ef56a58']

{'Location': 2,
 'Organization': 2,
 'Practitioner': 2,
 'PractitionerRole': 2,
 'Encounter': 8,
 'Observation': 93,
 'Procedure': 5,
 'Immunization': 11,
 'DiagnosticReport': 12,
 'DocumentReference': 8,
 'Claim': 8,
 'ExplanationOfBenefit': 8,
 'Condition': 8,
 'CareTeam': 2,
 'CarePlan': 2}

In [29]:
patient_id = 'd13874ec-22ea-46ed-a55c-1fd75ef56a58'

references = {}

def look_enctouners(self):
    with open('./data/Encounter.ndjson', 'r') as inf:
        encounter_file = ndjson.load(inf)

    for i, encounter in enumerate(encounter_file):
        #print(encounter)

        patient_reference = patient = encounter['subject']['reference'].split('/')[1]
        #print(patient_reference, patient_id)
        if patient_reference == patient_id:
            print("cool")

            print(encounter['resourceType'])

            patient = encounter['subject']['reference']

            practitioner = encounter['participant'][0]['individual']['reference']        
            if 'Practitioner' not in references.keys():
                references['Practitioner'] = 1
            else:
                references['Practitioner'] += 1

            location = encounter['location'][0]['location']['reference']
            if 'Location' not in references.keys():
                references['Location'] = 1
            else:
                references['Location'] += 1

            organization = encounter['serviceProvider']['reference']
            if 'Organization' not in references.keys():
                references['Organization'] = 1
            else:
                references['Organization'] += 1



### Encounters does not include all of the other references. There are probably even more references in CarePlan and CareTeam.