### Load Packages

In [1]:
import boto3
import pandas as pd 
import json 
import configparser 
import ndjson
import re 

### Configuration

For security i have made a configuration file that contains my AWS access key and secret key

In [2]:
config=configparser.ConfigParser() 
config.read_file(open('config.cfg'))

KEY = config.get('AWS', 'KEY')
SECRET = config.get('AWS', 'SECRET')

### Connect to the S3 Service

In [3]:
s3 = boto3.resource('s3', 
    region_name = 'us-west-2', 
    aws_access_key_id = KEY, 
    aws_secret_access_key = SECRET
)

### Review the Contents of Bucket

After connecting the service and accessing the bucket lets review the contents of the bucket.  

In [4]:
bucket = s3.Bucket('1up-coding-challenge-patients')

for obj in bucket.objects.all():
    print(obj)

s3.ObjectSummary(bucket_name='1up-coding-challenge-patients', key='AllergyIntolerance.ndjson')
s3.ObjectSummary(bucket_name='1up-coding-challenge-patients', key='CarePlan.ndjson')
s3.ObjectSummary(bucket_name='1up-coding-challenge-patients', key='CareTeam.ndjson')
s3.ObjectSummary(bucket_name='1up-coding-challenge-patients', key='Claim.ndjson')
s3.ObjectSummary(bucket_name='1up-coding-challenge-patients', key='Condition.ndjson')
s3.ObjectSummary(bucket_name='1up-coding-challenge-patients', key='Device.ndjson')
s3.ObjectSummary(bucket_name='1up-coding-challenge-patients', key='DiagnosticReport.ndjson')
s3.ObjectSummary(bucket_name='1up-coding-challenge-patients', key='DocumentReference.ndjson')
s3.ObjectSummary(bucket_name='1up-coding-challenge-patients', key='Encounter.ndjson')
s3.ObjectSummary(bucket_name='1up-coding-challenge-patients', key='ExplanationOfBenefit.ndjson')
s3.ObjectSummary(bucket_name='1up-coding-challenge-patients', key='Group.ndjson')
s3.ObjectSummary(bucket_name='1u

In [112]:
pat = s3.Object('1up-coding-challenge-patients', 'ExplanationOfBenefit.ndjson')
pat1 = pat.get()['Body'].read()
pat2 = ndjson.loads(pat1.decode('utf-8'))

In [113]:
pat1 = pat.get()['Body'].read()

In [114]:
pat2 = ndjson.loads(pat1.decode('utf-8'))

In [162]:
for i in pat2[1]['item'][0]['productOrService']['coding'][0]: 
    if i == 'code': 
        print(pat2[1]['item'][0]['productOrService']['coding'][0][i])

424441002


In [205]:
from io import BytesIO
import json

obj = s3.Object('1up-coding-challenge-patients', 'Patient.ndjson')
with BytesIO(obj.get()['Body'].read()) as bio: 
    data = bio.readlines()
    data = [json.loads(line) for line in data]


In [453]:
df = pd.json_normalize(data, max_level=2)

In [450]:
for r in df.telecom: 
    df['phonenumber'] = r['value']
    df['type'] = r['system']

In [678]:
[r['address'][0]['extension'][0]['extension'][1] for r in data]

[{'url': 'longitude', 'valueDecimal': -71.24234884403893},
 {'url': 'longitude', 'valueDecimal': -70.66095565333276},
 {'url': 'longitude', 'valueDecimal': -70.89077716097712},
 {'url': 'longitude', 'valueDecimal': -72.13463118195973},
 {'url': 'longitude', 'valueDecimal': -71.20155820047266},
 {'url': 'longitude', 'valueDecimal': -71.06409980842905},
 {'url': 'longitude', 'valueDecimal': -70.9222678474517},
 {'url': 'longitude', 'valueDecimal': -72.72526515453637},
 {'url': 'longitude', 'valueDecimal': -71.01882431610733},
 {'url': 'longitude', 'valueDecimal': -71.13420161568726},
 {'url': 'longitude', 'valueDecimal': -70.88317245487545},
 {'url': 'longitude', 'valueDecimal': -71.08590772127563},
 {'url': 'longitude', 'valueDecimal': -71.19501270210624},
 {'url': 'longitude', 'valueDecimal': -70.31588552264081},
 {'url': 'longitude', 'valueDecimal': -71.8853768531197},
 {'url': 'longitude', 'valueDecimal': -70.93266938466046},
 {'url': 'longitude', 'valueDecimal': -71.08427259784995},

In [623]:


for i in data[0]: 
    print(i)
   


resourceType
id
meta
text
extension
identifier
name
telecom
gender
birthDate
address
maritalStatus
multipleBirthBoolean
communication


In [654]:
d = {} 
d['id'] = [r['id'] for r in data]
d['resource'] = [r['resourceType'] for r in data]
d['language'] = [r['communication'][0]['language']['text'] for r in data]
d['lastname'] =  [r['name'][0]['family'] for r in data]
d['firstname'] =  [r['name'][0]['given'][0] for r in data]
d['address1'] = [r['address'][0]['line'][0] for r in data]
d['city'] = [r['address'][0]['city'] for r in data]
d['state'] = [r['address'][0]['state'] for r in data]


In [655]:
d

{'id': ['c4768f2a-f932-4ab6-a4a5-6e8ae0f9da8d',
  'ebf9231d-6a1f-432a-90c2-bc1b340ae047',
  'db89be6a-a4c5-4f3c-b62c-a5e1b6abe983',
  'ae7f3b20-ac42-423b-9386-884d14a5cfc5',
  '421c3eaf-f95c-47af-b8cd-f6cbcb192fad',
  'd8e414ef-9d1f-432a-88e6-b1bcdadeb711',
  'b716e854-c172-4384-bc92-46a51f2dd91b',
  '29b1e7a6-3664-4667-b9ae-2c166aa48b28',
  '2116b133-bab9-483e-8afb-e90d6d875962',
  'd13874ec-22ea-46ed-a55c-1fd75ef56a58',
  '6bc1f207-dac7-4167-9672-60bc3659e3e5',
  'd9365a63-86fb-4914-b3ec-8b43716b5767',
  '1392b4ce-78ba-4bda-a8ff-d5fe7e04f558',
  '34ff2141-1565-4564-a801-18f019fa10ab',
  '70c8e3d4-e3ac-4db0-a8eb-74df565283a4',
  '75ba3372-0b0c-4fa4-b73c-ee8dce1d6872',
  'c223cc3e-7ac7-4a7b-b3a1-04444e6303bf',
  '37f771b2-ab08-4026-8bf8-c6bad0e77f92',
  'd99eef72-f99a-4494-9706-818b4e8c0919',
  'a542b75f-2c04-4688-92d4-02175abb80b6',
  '31c49a7a-1f98-455f-ad2a-5d4ccc5f3193',
  'b6661250-29bc-4707-b491-cb6c50aba943',
  '066206be-6741-41f5-b4c6-3b83c443d158',
  'a8bbd3cf-0693-4537-bb14-b

In [634]:
[r['name'][0]['family'] for r in data]

['Abernathy524',
 'Treutel973',
 'Gislason620',
 'Maggio310',
 'Greenholt190',
 'Yost751',
 'Pfannerstill264',
 'Glover433',
 'Johnson679',
 'Bode78',
 'Denesik803',
 'Feil794',
 'Kreiger457',
 'Lynch190',
 'Nienow652',
 'Rogahn59',
 'Dooley940',
 'Braun514',
 'Connelly992',
 'Schamberger479',
 'Dickens475',
 'Franecki195',
 'Corkery305',
 'Rice937',
 'Nader710',
 'Hegmann834',
 'Wuckert783',
 'Pollich983']

In [451]:
df['language'] = [r['language']['text'] for r in df.communication]

In [520]:
d = {}
for i in df.columns:
    if i == 'id': 
        d['id'] =  [r for r in df[i]]

    elif i == 'communication': 
        d['language'] = [r[0]['language']['text'] for r in df[i]]
    
    elif i == 'telecom': 
        d['phone'] = [r[0]['value'] for r in df[i]]
        d['type'] = [r[0]['system'] for r in df[i]]
    
    elif i  == 'name':
        d['lastName']=[r[0]['family'] for r in df[i]]
        d['firstname']=[r[0]['given'][0] for r in df[i]]



In [521]:
d
        

{'id': ['c4768f2a-f932-4ab6-a4a5-6e8ae0f9da8d',
  'ebf9231d-6a1f-432a-90c2-bc1b340ae047',
  'db89be6a-a4c5-4f3c-b62c-a5e1b6abe983',
  'ae7f3b20-ac42-423b-9386-884d14a5cfc5',
  '421c3eaf-f95c-47af-b8cd-f6cbcb192fad',
  'd8e414ef-9d1f-432a-88e6-b1bcdadeb711',
  'b716e854-c172-4384-bc92-46a51f2dd91b',
  '29b1e7a6-3664-4667-b9ae-2c166aa48b28',
  '2116b133-bab9-483e-8afb-e90d6d875962',
  'd13874ec-22ea-46ed-a55c-1fd75ef56a58',
  '6bc1f207-dac7-4167-9672-60bc3659e3e5',
  'd9365a63-86fb-4914-b3ec-8b43716b5767',
  '1392b4ce-78ba-4bda-a8ff-d5fe7e04f558',
  '34ff2141-1565-4564-a801-18f019fa10ab',
  '70c8e3d4-e3ac-4db0-a8eb-74df565283a4',
  '75ba3372-0b0c-4fa4-b73c-ee8dce1d6872',
  'c223cc3e-7ac7-4a7b-b3a1-04444e6303bf',
  '37f771b2-ab08-4026-8bf8-c6bad0e77f92',
  'd99eef72-f99a-4494-9706-818b4e8c0919',
  'a542b75f-2c04-4688-92d4-02175abb80b6',
  '31c49a7a-1f98-455f-ad2a-5d4ccc5f3193',
  'b6661250-29bc-4707-b491-cb6c50aba943',
  '066206be-6741-41f5-b4c6-3b83c443d158',
  'a8bbd3cf-0693-4537-bb14-b

In [522]:
pd.DataFrame(d)


Unnamed: 0,id,lastName,firstname,phone,type,language
0,c4768f2a-f932-4ab6-a4a5-6e8ae0f9da8d,Abernathy524,Treena759,555-169-2844,phone,English
1,ebf9231d-6a1f-432a-90c2-bc1b340ae047,Treutel973,Mohamed943,555-523-4673,phone,Russian (Russia)
2,db89be6a-a4c5-4f3c-b62c-a5e1b6abe983,Gislason620,Sadye922,555-845-8224,phone,English
3,ae7f3b20-ac42-423b-9386-884d14a5cfc5,Maggio310,Dot905,555-658-8363,phone,English
4,421c3eaf-f95c-47af-b8cd-f6cbcb192fad,Greenholt190,Wava789,555-815-1331,phone,English
5,d8e414ef-9d1f-432a-88e6-b1bcdadeb711,Yost751,Rosalia943,555-222-8384,phone,English
6,b716e854-c172-4384-bc92-46a51f2dd91b,Pfannerstill264,Clemente531,555-910-5598,phone,English
7,29b1e7a6-3664-4667-b9ae-2c166aa48b28,Glover433,Alan320,555-512-5627,phone,English
8,2116b133-bab9-483e-8afb-e90d6d875962,Johnson679,Judson999,555-883-3861,phone,English
9,d13874ec-22ea-46ed-a55c-1fd75ef56a58,Bode78,Cleo27,555-703-6424,phone,English


In [583]:
data[0]['address'][0]['line'][0]

'415 Davis Neck Unit 94'

In [192]:
df.head()

Unnamed: 0,resourceType,id,meta,text,extension,identifier,name,telecom,gender,birthDate,address,maritalStatus,multipleBirthBoolean,communication,deceasedDateTime
0,Patient,c4768f2a-f932-4ab6-a4a5-6e8ae0f9da8d,{'profile': ['http://standardhealthrecord.org/...,"{'status': 'generated', 'div': '<div xmlns=""ht...",[{'url': 'http://hl7.org/fhir/us/core/Structur...,[{'system': 'https://github.com/synthetichealt...,"[{'use': 'official', 'family': 'Abernathy524',...","[{'system': 'phone', 'value': '555-169-2844', ...",female,1988-01-05,[{'extension': [{'url': 'http://hl7.org/fhir/S...,{'coding': [{'system': 'http://terminology.hl7...,False,[{'language': {'coding': [{'system': 'urn:ietf...,
1,Patient,ebf9231d-6a1f-432a-90c2-bc1b340ae047,{'profile': ['http://standardhealthrecord.org/...,"{'status': 'generated', 'div': '<div xmlns=""ht...",[{'url': 'http://hl7.org/fhir/us/core/Structur...,[{'system': 'https://github.com/synthetichealt...,"[{'use': 'official', 'family': 'Treutel973', '...","[{'system': 'phone', 'value': '555-523-4673', ...",male,2010-03-04,[{'extension': [{'url': 'http://hl7.org/fhir/S...,{'coding': [{'system': 'http://terminology.hl7...,False,[{'language': {'coding': [{'system': 'urn:ietf...,
2,Patient,db89be6a-a4c5-4f3c-b62c-a5e1b6abe983,{'profile': ['http://standardhealthrecord.org/...,"{'status': 'generated', 'div': '<div xmlns=""ht...",[{'url': 'http://hl7.org/fhir/us/core/Structur...,[{'system': 'https://github.com/synthetichealt...,"[{'use': 'official', 'family': 'Gislason620', ...","[{'system': 'phone', 'value': '555-845-8224', ...",female,1977-06-01,[{'extension': [{'url': 'http://hl7.org/fhir/S...,{'coding': [{'system': 'http://terminology.hl7...,False,[{'language': {'coding': [{'system': 'urn:ietf...,
3,Patient,ae7f3b20-ac42-423b-9386-884d14a5cfc5,{'profile': ['http://standardhealthrecord.org/...,"{'status': 'generated', 'div': '<div xmlns=""ht...",[{'url': 'http://hl7.org/fhir/us/core/Structur...,[{'system': 'https://github.com/synthetichealt...,"[{'use': 'official', 'family': 'Maggio310', 'g...","[{'system': 'phone', 'value': '555-658-8363', ...",female,1953-12-30,[{'extension': [{'url': 'http://hl7.org/fhir/S...,{'coding': [{'system': 'http://terminology.hl7...,False,[{'language': {'coding': [{'system': 'urn:ietf...,2020-03-30T15:42:49-04:00
4,Patient,421c3eaf-f95c-47af-b8cd-f6cbcb192fad,{'profile': ['http://standardhealthrecord.org/...,"{'status': 'generated', 'div': '<div xmlns=""ht...",[{'url': 'http://hl7.org/fhir/us/core/Structur...,[{'system': 'https://github.com/synthetichealt...,"[{'use': 'official', 'family': 'Greenholt190',...","[{'system': 'phone', 'value': '555-815-1331', ...",female,1995-06-13,[{'extension': [{'url': 'http://hl7.org/fhir/S...,{'coding': [{'system': 'http://terminology.hl7...,False,[{'language': {'coding': [{'system': 'urn:ietf...,


In [187]:
from io import StringIO
a = pat2[0]
h = json.dumps(a)
l = StringIO(h)

pd.read_json(l)

ValueError: arrays must all be same length

In [160]:
pat2[1]['item'][0]['productOrService']['coding'][0]['code']

'424441002'

In [150]:
print()




In [42]:
result = []
for name in pat2:
    dict = {} 
    dict['id'] = name['id']
    dict['allergy'] = name['code']['text']
    result.append(dict)

In [46]:
result

[{'id': 'e4e9794e-93a3-4528-8272-b527f653e4b9',
  'allergy': 'Dander (animal) allergy'},
 {'id': 'c522f480-a592-46b2-8031-8cf8448e9afd', 'allergy': 'Allergy to eggs'},
 {'id': '69472bfc-8908-43e1-a1d7-06dfec55a07f', 'allergy': 'Allergy to mould'},
 {'id': '54896ed8-b234-4719-967c-3e2cdfb2d7ff',
  'allergy': 'Dander (animal) allergy'},
 {'id': 'cb66b5f7-2434-4ec5-9ceb-18d1c150ac65',
  'allergy': 'Allergy to grass pollen'},
 {'id': '6ef179cf-34a8-4e28-ad76-e690720f9d7f',
  'allergy': 'Allergy to tree pollen'},
 {'id': '57dcd0af-3eb5-4eae-b143-8b9e697bb07c', 'allergy': 'Allergy to fish'}]

In [47]:
dict3 = {item['id']: item for item in result}

In [54]:
dict4 = {} 
for item in result: 
    print(item)
    #new = item.pop('id')
    #dict4[name] = item 

{'allergy': 'Dander (animal) allergy'}
{'id': 'c522f480-a592-46b2-8031-8cf8448e9afd', 'allergy': 'Allergy to eggs'}
{'id': '69472bfc-8908-43e1-a1d7-06dfec55a07f', 'allergy': 'Allergy to mould'}
{'id': '54896ed8-b234-4719-967c-3e2cdfb2d7ff', 'allergy': 'Dander (animal) allergy'}
{'id': 'cb66b5f7-2434-4ec5-9ceb-18d1c150ac65', 'allergy': 'Allergy to grass pollen'}
{'id': '6ef179cf-34a8-4e28-ad76-e690720f9d7f', 'allergy': 'Allergy to tree pollen'}
{'id': '57dcd0af-3eb5-4eae-b143-8b9e697bb07c', 'allergy': 'Allergy to fish'}


In [100]:
dict5 = {}
id = []
alergy = []
for i in result: 
    #print(i)
    for k, v in i.items(): 
        if k == 'id':
            id.append(v)
        else: 
            alergy.append(v)

dict5['id'] =  id 
dict5['allergy'] = alergy 

In [101]:
dict5

{'id': ['c522f480-a592-46b2-8031-8cf8448e9afd',
  '69472bfc-8908-43e1-a1d7-06dfec55a07f',
  '54896ed8-b234-4719-967c-3e2cdfb2d7ff',
  'cb66b5f7-2434-4ec5-9ceb-18d1c150ac65',
  '6ef179cf-34a8-4e28-ad76-e690720f9d7f',
  '57dcd0af-3eb5-4eae-b143-8b9e697bb07c'],
 'allergy': ['Dander (animal) allergy',
  'Allergy to eggs',
  'Allergy to mould',
  'Dander (animal) allergy',
  'Allergy to grass pollen',
  'Allergy to tree pollen',
  'Allergy to fish']}

In [110]:
df = pd.DataFrame(result)
df

Unnamed: 0,allergy,id
0,Dander (animal) allergy,
1,Allergy to eggs,c522f480-a592-46b2-8031-8cf8448e9afd
2,Allergy to mould,69472bfc-8908-43e1-a1d7-06dfec55a07f
3,Dander (animal) allergy,54896ed8-b234-4719-967c-3e2cdfb2d7ff
4,Allergy to grass pollen,cb66b5f7-2434-4ec5-9ceb-18d1c150ac65
5,Allergy to tree pollen,6ef179cf-34a8-4e28-ad76-e690720f9d7f
6,Allergy to fish,57dcd0af-3eb5-4eae-b143-8b9e697bb07c


In [430]:

def get_patient(): 


    pat = s3.Object('1up-coding-challenge-patients', 'Patient.ndjson')
    pat1 = pat.get()['Body'].read()
    pat2 = ndjson.loads(pat1.decode('utf-8'))

    h = input("Enter First Name: ")
    v = input("Enter last name: ")

    fnl, lnl, idl = None, None, None

    for name in pat2: 

        if name['name'][0]['family'] == v and name['name'][0]['given'][0] == h: 

            fnl = name['name'][0]['given'][0]
            lnl = name['name'][0]['family']
            idl = name['id']    
    
    return fnl, lnl, idl 




In [431]:
fn, ln, id = get_patient()

In [439]:
get_patient()(3)

TypeError: 'tuple' object is not callable

In [432]:
print(fn, ln, id)

Cleo27 Bode78 d13874ec-22ea-46ed-a55c-1fd75ef56a58


'Bode78 Cleo27'

In [151]:
if a['name'][0]['family'] == 'Bode78' and a['name'][0]['given'][0] == 'Cleo27': 
    print('YES')

else: 
    print('No')

YES


Need to ndjson library to be able to read ndjson files 
https://pypi.org/project/ndjson/


In [92]:
buckets = list(bucket.objects.all())

In [174]:
content3 = s3.Object('1up-coding-challenge-patients', 'Observation.ndjson')
print(content3)

s3.Object(bucket_name='1up-coding-challenge-patients', key='Observation.ndjson')


In [214]:
content4 = content3.get()['Body'].read()
json_data1 = ndjson.loads(content4.decode('utf-8'))


In [212]:
idl1 = 'Patient/'+idl


In [241]:
count = {'a' : 0}
for item in json_data1: 
    if idl in item['subject']['reference']: 
        count['a'] += 1

In [242]:
count

{'a': 93}

In [323]:
d = {}
ls = []
for obj in bucket.objects.all(): 
    filename = []
    filename.append(obj.key)

    if obj.key != 'Patient.ndjson': 
        hl = obj.key

        content = obj

        content1 = content.get()['Body'].read()
        json_data = ndjson.loads(content1.decode('utf-8'))

        for item in filename: 
            bar = re.sub(".ndjson", '', str(item))
            new = bar
            for item in json_data: 
                if item[0]['resourceType'] == new:
                    if idl in item[0]['target'][0]: 
                        ls.append(new)

                for i in item: 
                    if i == 'subject': 
                        if idl in item['subject']['reference']: 
                            ls.append(new)
                    elif i == 'patient': 
                        if idl in item['patient']['reference']: 
                            ls.append(new)
                    elif i == 'target': 
                        if idl in item['target'][:]: 
                            ls.append(new)




KeyError: 0

In [388]:
a = Counter(ls).keys()
b = Counter(ls).values()
print(a)
print(b)

In [389]:
print(a)
print(b)

dict_keys(['CarePlan', 'CareTeam', 'Claim', 'Condition', 'DiagnosticReport', 'DocumentReference', 'Encounter', 'ExplanationOfBenefit', 'Immunization', 'Observation', 'Procedure', 'Provenance'])
dict_values([2, 2, 8, 8, 12, 8, 8, 8, 11, 93, 5, 1])


In [401]:
ls1 = []
for obj in bucket.objects.all(): 
    filename = []
    filename.append(obj.key)

    if obj.key != 'Patient.ndjson': 
        hl = obj.key

        content = obj

        content1 = content.get()['Body'].read()
        json_data = ndjson.loads(content1.decode('utf-8'))
        ls1.append(json_data)

json_data[0]

{'resourceType': 'SupplyDelivery',
 'id': 'afe96e02-30f7-4623-b3f4-0b723d1c5815',
 'status': 'completed',
 'patient': {'reference': 'Patient/421c3eaf-f95c-47af-b8cd-f6cbcb192fad'},
 'type': {'coding': [{'system': 'http://terminology.hl7.org/CodeSystem/supply-item-type',
    'code': 'device',
    'display': 'Device'}]},
 'suppliedItem': {'quantity': {'value': 2},
  'itemCodeableConcept': {'coding': [{'system': 'http://snomed.info/sct',
     'code': '409534002',
     'display': 'Disposable air-purifying respirator (physical object)'}],
   'text': 'Disposable air-purifying respirator (physical object)'}},
 'occurrenceDateTime': '2020-03-13T20:45:25-04:00'}

In [222]:
import re 

newlist = [] 

for item in filename: 
    bar = re.sub(".ndjson", '', str(item))
    newlist.append(bar)

In [427]:
for item in ls1: 
    if item[0]['resourceType'] == 'Location':
        #print(item[0]['target'][0])
        a = item

In [428]:
a[0]

{'resourceType': 'Location',
 'id': '2b38f05a-19f4-4bb5-b0f7-3a7a7e3a8f28',
 'meta': {'profile': ['http://hl7.org/fhir/us/core/StructureDefinition/us-core-location']},
 'status': 'active',
 'name': 'LAHEY HOSPITAL & MEDICAL CENTER, BURLINGTON',
 'telecom': [{'system': 'phone', 'value': '7817445100'}],
 'address': {'line': ['41 & 45 MALL ROAD'],
  'city': 'BURLINGTON',
  'state': 'MA',
  'postalCode': '01803',
  'country': 'US'},
 'position': {'longitude': -71.201713, 'latitude': 42.503227},
 'managingOrganization': {'reference': 'Organization/6f122869-a856-3d65-8db9-099bf4f5bbb8',
  'display': 'LAHEY HOSPITAL & MEDICAL CENTER, BURLINGTON'}}

In [435]:

def pat_info(): 

    ls = []
    for obj in bucket.objects.all(): 
        filename = []
        filename.append(obj.key)

        if obj.key != 'Patient.ndjson': 
            hl = obj.key

            content = obj

            content1 = content.get()['Body'].read()
            json_data = ndjson.loads(content1.decode('utf-8'))

            for item in filename: 
                bar = re.sub(".ndjson", '', str(item))
                new = bar
                for item in json_data: 
                    if item['resourceType'] == new:
                        if 'patient' in item: 
                            if id in item['patient']['reference']: 
                                ls.append(new)
                        elif 'subject' in item: 
                            if id in item['subject']['reference']:
                                ls.append(new)
                        elif 'target' in item: 
                            for x in item['target']: 
                                if id in x['reference']:
                                    ls.append(new)
                            #if idl in item['target'][0]:
                                #ls1.append(new)
                        #print(item)
                        #if idl in item[0]['target'][0]: 
                            #ls.append(new)
    a = Counter(ls).keys()
    b = Counter(ls).values()

    final = pd.DataFrame(list(zip(a,b)), columns = ['RESOURCE_TYPE', 'COUNT'])
    return final.sort_values(['COUNT'], ascending=False)
    

In [436]:
pat_info()

Unnamed: 0,RESOURCE_TYPE,COUNT
9,Observation,93
4,DiagnosticReport,12
8,Immunization,11
2,Claim,8
3,Condition,8
5,DocumentReference,8
6,Encounter,8
7,ExplanationOfBenefit,8
10,Procedure,5
0,CarePlan,2


In [434]:
a = Counter(ls).keys()
b = Counter(ls).values()
print(a)
print(b)

dict_keys(['CarePlan', 'CareTeam', 'Claim', 'Condition', 'DiagnosticReport', 'DocumentReference', 'Encounter', 'ExplanationOfBenefit', 'Immunization', 'Observation', 'Procedure', 'Provenance'])
dict_values([2, 2, 8, 8, 12, 8, 8, 8, 11, 93, 5, 1])


In [422]:
list(zip(a, b))

[('CarePlan', 2),
 ('CareTeam', 2),
 ('Claim', 8),
 ('Condition', 8),
 ('DiagnosticReport', 12),
 ('DocumentReference', 8),
 ('Encounter', 8),
 ('ExplanationOfBenefit', 8),
 ('Immunization', 11),
 ('Observation', 93),
 ('Procedure', 5),
 ('Provenance', 1)]

In [426]:
final = pd.DataFrame(list(zip(a,b)), columns = ['RESOURCE_TYPE', 'COUNT'])
final.sort_values(['COUNT'], ascending=False)

Unnamed: 0,RESOURCE_TYPE,COUNT
9,Observation,93
4,DiagnosticReport,12
8,Immunization,11
2,Claim,8
3,Condition,8
5,DocumentReference,8
6,Encounter,8
7,ExplanationOfBenefit,8
10,Procedure,5
0,CarePlan,2


In [441]:
id

'd13874ec-22ea-46ed-a55c-1fd75ef56a58'

In [444]:
 
patient_id = 'd13874ec-22ea-46ed-a55c-1fd75ef56a58'

def get_patient(id): 




    pat = s3.Object('1up-coding-challenge-patients', 'Patient.ndjson')
    pat1 = pat.get()['Body'].read()
    pat2 = ndjson.loads(pat1.decode('utf-8'))


    fnl, lnl, idl = None, None, None

    for name in pat2: 

        if name['id']==patient_id: 

            fnl = name['name'][0]['given'][0]
            lnl = name['name'][0]['family']
            idl = name['id']    
    
    return fnl, lnl, idl 

In [445]:
fn, ln, id = get_patient(patient_id)

In [446]:
fn

'Cleo27'