# CHF Hospitalization

In [4]:
import glob
import pandas as pd


### Get all patient data

In [5]:
raw_data = []
for file in glob.glob('../data/fhir/*'):
    raw_data.append(pd.read_json(file))

### Collect Features

In [6]:
# Init
training_data = {}

# Populate training_data
for patient in raw_data:
    id = patient['entry'][0]['resource']['id']
    training_data[id] = {}
    
    # Feature init
    training_data[id]['Age'] = 2020 - (int)(patient['entry'][0]['resource']['birthDate'][0:4])
    if training_data[id]['Age'] > 50:
        training_data[id]['Age Over 50'] = 1
    else:
        training_data[id]['Age Over 50'] = 0
    training_data[id]['Body Mass Index'] = {}
    training_data[id]['Diastolic'] = {}
    training_data[id]['Systolic'] = {}
    training_data[id]['Pain'] = {}
    training_data[id]['Prediabetes'] = 0 # counter
    
    training_data[id]['CHF Re-Hosp'] = 0 # boolean - re-hospitalized for chf (EMER or IMP)
    training_data[id]['CHF First Discharge'] = None # date of first discharge for chf 
    training_data[id]['CHF Second Discharge'] = None # date of re-hospitalization for chf
    
    # Collect features from patient
    for entry in patient['entry']:
        resource_type = entry['resource']['resourceType']
        
        if resource_type == "Observation":
            category_code = entry['resource']['category'][0]['coding'][0]['code']
            if category_code == "vital-signs":
                display = entry['resource']['code']['text']
                if display == "Body Mass Index":
                    value = entry['resource']['valueQuantity']['value']
                    date = entry['resource']['effectiveDateTime']
                    training_data[id][display][date] = value
                if display == "Blood Pressure":
                    diastolic_val = entry['resource']['component'][0]['valueQuantity']['value']
                    systolic_val = entry['resource']['component'][1]['valueQuantity']['value']
                    date = entry['resource']['effectiveDateTime']
                    training_data[id]['Diastolic'][date] = diastolic_val
                    training_data[id]['Systolic'][date] = systolic_val
                if display == "Pain severity - 0-10 verbal numeric rating [Score] - Reported":
                    value = entry['resource']['valueQuantity']['value']
                    date = entry['resource']['effectiveDateTime']
                    training_data[id]['Pain'] = value
                    
        if resource_type == "Encounter":
            act_code = entry['resource']['class']['code']
            if act_code == "IMP" or act_code == "EMER": # hospitalized
                try:
                    reason = entry['resource']['reasonCode'][0]['coding'][0]['display']
                    if reason == "Chronic congestive heart failure (disorder)":
                        if training_data[id]['CHF First Discharge'] is None:
                            training_data[id]['CHF First Discharge'] = entry['resource']['period']['end']
                            training_data[id]['CHF Re-Hosp'] = 1
                        elif training_data[id]['CHF Second Discharge'] is None:
                            training_data[id]['CHF Second Discharge'] = entry['resource']['period']['end']
                            break # stop collecting data for patient if re-hospitalized for chf
                except:
                    pass

        if resource_type == "Condition":
            display = entry['resource']['code']['coding'][0]['display']
            if display == 'Prediabetes':
                training_data[id]['Prediabetes'] += 1
                
# Display
for id in training_data:
    print(training_data[id])

{'Age': 79, 'Age Over 50': 1, 'Body Mass Index': {'2006-05-21T22:59:21-07:00': 27.31, '2007-05-27T22:59:21-07:00': 27.31, '2008-06-01T22:59:21-07:00': 27.31, '2009-06-07T22:59:21-07:00': 27.31, '2010-06-13T22:59:21-07:00': 27.31, '2011-06-12T22:59:21-07:00': 27.31, '2011-06-19T22:59:21-07:00': 27.31, '2012-06-24T22:59:21-07:00': 27.31}, 'Diastolic': {'2006-05-21T22:59:21-07:00': 79, '2007-05-27T22:59:21-07:00': 80.0, '2008-06-01T22:59:21-07:00': 79, '2009-06-07T22:59:21-07:00': 76, '2010-06-13T22:59:21-07:00': 78, '2011-06-12T22:59:21-07:00': 77, '2011-06-19T22:59:21-07:00': 83, '2012-06-24T22:59:21-07:00': 80.0}, 'Systolic': {'2006-05-21T22:59:21-07:00': 117, '2007-05-27T22:59:21-07:00': 124, '2008-06-01T22:59:21-07:00': 120.0, '2009-06-07T22:59:21-07:00': 124, '2010-06-13T22:59:21-07:00': 129, '2011-06-12T22:59:21-07:00': 128, '2011-06-19T22:59:21-07:00': 111, '2012-06-24T22:59:21-07:00': 132}, 'Pain': 2, 'Prediabetes': 1, 'CHF Re-Hosp': 1, 'CHF First Discharge': '2013-01-24T23:29:21