In [94]:
import numpy as np  
import pandas as pd 
import json
from datetime import date
from tqdm.auto import tqdm     #used for creating progress meters or bars
tqdm.pandas()

In [95]:
from fhir.resources.patient import Patient
from fhir.resources.condition import Condition
from fhir.resources.medication import Medication
from fhir.resources.claim import Claim


In [96]:
import os
filesList = os.listdir('/Project inco/fhir/')
print(len(filesList))

1181


In [97]:
PATIENT = pd.DataFrame(columns=['PatientUID', 'NameFamily', 'NameGiven', 'DoB', 'Gender'])
CONDITION = pd.DataFrame(columns=['ConditionText', 'ConditionOnsetDates', 'PatientUID'])
MEDICATION = pd.DataFrame(columns=['MedicationText', 'MedicationDates', 'PatientUID'])
CLAIM = pd.DataFrame(columns=['ClaimProvider', 'ClaimInsurance', 'ClaimDate', 'ClaimType','ClaimItem', 
                              'ClaimUSD', 'PatientUID'])

In [98]:
#ONE bundle = ONE patient

f = open('/Project inco/fhir/'+filesList[50],)
json_obj = json.load(f)

oneBundle = Bundle.parse_obj(json_obj)

# Resources

resources = []
if oneBundle.entry is not None:
    for entry in oneBundle.entry:
        resources.append(entry.resource)


oneResources = []
for j in range(len(resources)):
    oneResources.append(type(resources[j]))
    
print(len(oneResources))

uniqResources = set(oneResources)
print(len(uniqResources))
uniqResources

243
16


{fhir.resources.allergyintolerance.AllergyIntolerance,
 fhir.resources.careplan.CarePlan,
 fhir.resources.careteam.CareTeam,
 fhir.resources.claim.Claim,
 fhir.resources.condition.Condition,
 fhir.resources.diagnosticreport.DiagnosticReport,
 fhir.resources.encounter.Encounter,
 fhir.resources.explanationofbenefit.ExplanationOfBenefit,
 fhir.resources.imagingstudy.ImagingStudy,
 fhir.resources.immunization.Immunization,
 fhir.resources.medicationrequest.MedicationRequest,
 fhir.resources.observation.Observation,
 fhir.resources.organization.Organization,
 fhir.resources.patient.Patient,
 fhir.resources.practitioner.Practitioner,
 fhir.resources.procedure.Procedure}

In [100]:
print(type(resources[0]))

<class 'fhir.resources.patient.Patient'>


In [101]:
onePatient = Patient.parse_obj(resources[0])
onePatient.name[0]

# Patient demographics
onePatientID = onePatient.id

print(onePatientID)
print(onePatient.name[0].family)
print(onePatient.name[0].given[0])
print(onePatient.birthDate)
print(onePatient.gender)

207ab280-2d58-408e-8301-6444e23c4a8f
Maya972
Andrea7
2008-03-02
female


In [102]:
# Find Condition resources 

resCondition = []
for j in range(len(resources)):
    if resources[j].__class__.__name__ == 'Condition':
        resCondition.append(resources[j])
        
conditions = []
conditionOnsetDates = []
for j in range(len(resCondition)):
    oneCondition = Condition.parse_obj(resCondition[j])
    conditions.append(oneCondition.code.text)
    conditionOnsetDates.append(str(oneCondition.onsetDateTime.date()))  
    
onePatConditions = pd.DataFrame()

onePatConditions['ConditionText'] = conditions
onePatConditions['ConditionOnsetDates'] = conditionOnsetDates
onePatConditions['PatientUID'] = onePatientID

print(onePatConditions.shape)
onePatConditions.sample(1)

(7, 3)


Unnamed: 0,ConditionText,ConditionOnsetDates,PatientUID
6,Chronic sinusitis (disorder),2018-05-05,207ab280-2d58-408e-8301-6444e23c4a8f


In [104]:
# Find Claim resources 

resClaims = []
for j in range(len(resources)):
    if resources[j].__class__.__name__ == 'Claim':
        resClaims.append(resources[j])
        
claimProvider = []
claimInsurance = []
claimDate = []
claimType = []
claimItem = []
claimUSD = []

for j in range(len(resClaims)):
    oneClaim = Claim.parse_obj(resClaims[j])
    # Inner loop over claim items:
    for i in range(len(resClaims[j].item)):
        claimProvider.append(oneClaim.provider.display)
        claimInsurance.append(oneClaim.insurance[0].coverage.display)
        claimDate.append(str(oneClaim.billablePeriod.start.date()))
        claimType.append(oneClaim.type.coding[0].code)
        claimItem.append(resClaims[j].item[i].productOrService.text)
        if resClaims[j].item[i].net:
            claimUSD.append(str(resClaims[j].item[i].net.value))
        else:
            claimUSD.append('None')
    
onePatClaims = pd.DataFrame()

onePatClaims['ClaimProvider'] = claimProvider
onePatClaims['ClaimInsurance'] = claimInsurance
onePatClaims['ClaimDate'] = claimDate
onePatClaims['ClaimType'] = claimType
onePatClaims['ClaimItem'] = claimItem
onePatClaims['ClaimUSD'] = claimUSD
onePatClaims['PatientUID'] = onePatientID

print(onePatClaims.shape)
onePatClaims.sample(1)

(67, 7)


Unnamed: 0,ClaimProvider,ClaimInsurance,ClaimDate,ClaimType,ClaimItem,ClaimUSD,PatientUID
8,PCP9535,Aetna,2010-08-08,institutional,Pneumococcal conjugate PCV 13,140.52,207ab280-2d58-408e-8301-6444e23c4a8f


In [106]:
# Find MedicationRequest resources 

resMedicationRequest = []
for j in range(len(resources)):
    if resources[j].__class__.__name__ == 'MedicationRequest':
        resMedicationRequest.append(resources[j])
        
meds = []
medsDates = []
for j in range(len(resMedicationRequest)):
    oneMed = MedicationRequest.parse_obj(resMedicationRequest[j])
    meds.append(oneMed.medicationCodeableConcept.text)
    medsDates.append(str(oneMed.authoredOn.date()))  
    
onePatMeds = pd.DataFrame()

onePatMeds['MedicationText'] = meds
onePatMeds['MedicationDates'] = medsDates
onePatMeds['PatientUID'] = onePatientID

print(onePatMeds.shape)
onePatMeds.sample(1)

(6, 3)


Unnamed: 0,MedicationText,MedicationDates,PatientUID
1,NDA020800 0.3 ML Epinephrine 1 MG/ML Auto-Inje...,2009-06-13,207ab280-2d58-408e-8301-6444e23c4a8f


In [108]:
    # Find Claim resources ########################################
    resClaims = []
    for j in range(len(resources)):
        if resources[j].__class__.__name__ == 'Claim':
            resClaims.append(resources[j])

    claimProvider = []
    claimInsurance = []
    claimDate = []
    claimType = []
    claimItem = []
    claimUSD = []

    for j in range(len(resClaims)):
        oneClaim = Claim.parse_obj(resClaims[j])
        # Inner loop over claim items:
        for i in range(len(resClaims[j].item)):
            claimProvider.append(oneClaim.provider.display)
            claimInsurance.append(oneClaim.insurance[0].coverage.display)
            claimDate.append(str(oneClaim.billablePeriod.start.date()))
            claimType.append(oneClaim.type.coding[0].code)
            claimItem.append(resClaims[j].item[i].productOrService.text)
            if resClaims[j].item[i].net:
                claimUSD.append(str(resClaims[j].item[i].net.value))
            else:
                claimUSD.append('None')

    onePatClaims = pd.DataFrame()

    onePatClaims['ClaimProvider'] = claimProvider
    onePatClaims['ClaimInsurance'] = claimInsurance
    onePatClaims['ClaimDate'] = claimDate
    onePatClaims['ClaimType'] = claimType
    onePatClaims['ClaimItem'] = claimItem
    onePatClaims['ClaimUSD'] = claimUSD
    onePatClaims['PatientUID'] = onePatientID
    
    CLAIM = pd.concat([CLAIM, onePatClaims], ignore_index = True, axis=0)
    CLAIM.reset_index()

Unnamed: 0,index,ClaimProvider,ClaimInsurance,ClaimDate,ClaimType,ClaimItem,ClaimUSD,PatientUID
0,0,HALLMARK HEALTH SYSTEM,Aetna,2009-05-26,institutional,Encounter for problem,,207ab280-2d58-408e-8301-6444e23c4a8f
1,1,HALLMARK HEALTH SYSTEM,Aetna,2009-06-13,pharmacy,Encounter for problem,,207ab280-2d58-408e-8301-6444e23c4a8f
2,2,HALLMARK HEALTH SYSTEM,Aetna,2009-06-13,pharmacy,Encounter for problem,,207ab280-2d58-408e-8301-6444e23c4a8f
3,3,HALLMARK HEALTH SYSTEM,Aetna,2009-06-13,institutional,Encounter for problem,,207ab280-2d58-408e-8301-6444e23c4a8f
4,4,PCP9535,Aetna,2010-02-07,institutional,Well child visit (procedure),,207ab280-2d58-408e-8301-6444e23c4a8f
...,...,...,...,...,...,...,...,...
129,129,PCP9535,Aetna,2019-03-17,institutional,Tdap,140.52,207ab280-2d58-408e-8301-6444e23c4a8f
130,130,PCP9535,Aetna,2019-03-17,institutional,"Influenza, seasonal, injectable, preservative ...",140.52,207ab280-2d58-408e-8301-6444e23c4a8f
131,131,PCP9535,Aetna,2019-03-17,institutional,"HPV, quadrivalent",140.52,207ab280-2d58-408e-8301-6444e23c4a8f
132,132,PCP9535,Aetna,2019-03-17,institutional,meningococcal MCV4P,140.52,207ab280-2d58-408e-8301-6444e23c4a8f


In [120]:
for fileNum in tqdm(range(len(filesList))):
    #for fileNum in range(100):
    f = open('/Project inco/fhir/'+filesList[fileNum],)
    json_obj = json.load(f)

    oneBundle = Bundle.parse_obj(json_obj)

    # Resources 
    resources = []
    if oneBundle.entry is not None:
        for entry in oneBundle.entry:
            resources.append(entry.resource)
    
    onePatient = Patient.parse_obj(resources[0])

    # Patient demographics ########################################
    onePatientID = onePatient.id

    PATIENT.loc[len(PATIENT.index)] = [onePatientID, onePatient.name[0].family, 
                                       onePatient.name[0].given[0], onePatient.birthDate, onePatient.gender] 

  0%|          | 0/1181 [00:00<?, ?it/s]

UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 in position 1868: character maps to <undefined>