In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from fhir_client import FHIRClient
import logging
import pandas as pd
logging.basicConfig(level=logging.INFO)

client = FHIRClient(service_base_url='http://localhost:8080/baseDstu3')

### Querying Patients
There are two general ways of searching for patients with specific properties.

The first one is to search by coding system:

In [7]:
procedures = client.get_all_procedures()
pd.DataFrame([prod.code['coding'][0] for prod in procedures]).drop_duplicates().sort_values(by=['display']).head()

Unnamed: 0,code,display,system
397,183450002,Admission to burn unit,http://snomed.info/sct
398,305340004,Admission to long stay hospital,http://snomed.info/sct
18,305428000,Admission to orthopedic department,http://snomed.info/sct
6113,305433001,Admission to trauma surgery department,http://snomed.info/sct
116,395142003,Allergy screening test,http://snomed.info/sct


In [3]:
patients_by_procedure_code = client.get_patients_by_procedure_code("http://snomed.info/sct","73761001")

The second one is by text. The searched text will be `CodeableConcept.text`, `Coding.display`, or `Identifier.type.text`:

In [8]:
conditions = client.get_all_conditions()
pd.DataFrame([cond.code['coding'][0] for cond in conditions]).drop_duplicates().sort_values(by=['display']).head()

Unnamed: 0,code,display,system
520,241929008,Acute allergic reaction,http://snomed.info/sct
40,75498004,Acute bacterial sinusitis (disorder),http://snomed.info/sct
21,10509002,Acute bronchitis (disorder),http://snomed.info/sct
4,195662009,Acute viral pharyngitis (disorder),http://snomed.info/sct
202,26929004,Alzheimer's disease (disorder),http://snomed.info/sct


In [4]:
patients_by_condition_text = client.get_patients_by_condition_text("Acute viral pharyngitis")

## Machine Learning

In [5]:
from ml_on_fhir import MLOnFHIR
from fhir_objects.patient import Patient
from sklearn.tree import DecisionTreeClassifier

ml_fhir = MLOnFHIR(Patient, feature_attrs=['birthDate'], label_attrs=['gender'])
X, y, trained_clf = ml_fhir.fit(patients_by_procedure_code, DecisionTreeClassifier())

from sklearn.metrics import accuracy_score, roc_curve, auc
fpr, tpr, _ = roc_curve(y, trained_clf.predict(X))
print("Prediction accuracy {}".format( auc(fpr, tpr) ) )

INFO:root:Extracting attributes from data set
INFO:root:Preprocessing data
INFO:root:Started training of clf
INFO:root:Training completed


Prediction accuracy 0.6587121212121212


## Custom Preprocessing Classes

#### The first five values of the ` birthDate` feature that has been preprocessed into an age in years:

In [6]:
X[:5]

array([[57],
       [56],
       [72],
       [61],
       [77]])

If you want to preprocess fhir resources differently, feel free to implement your own preprocessing class. 
We can use the `register_preprocessor` function to do so. It is crucial to stick to the following naming scheme: 
    
    "<FHIR_Object_Name><FHIR_Resource_Name>Processor"
    
Furthemore, at least the methods `fit` and `transform` have to be implemented.

In [7]:
from preprocessing import register_preprocessor

from sklearn.base import BaseEstimator
import datetime as dt
import numpy as np
from fhir_objects.fhir_resources import date_format

class PatientBirthdateProcessor(BaseEstimator):
    """
    Calculates the age in days to use birthdate as a feature 
    """
    def transform(self, X, **transform_params):
        ages = []
        for birthdate in X:
            b_date = dt.datetime.strptime(birthdate[0], date_format)
            ages.append([int(
                            (dt.datetime.now().date() - b_date.date()).days)])
        return np.array(ages)

    def fit(self, X, y=None, **fit_params):
        return self
    
register_preprocessor(PatientBirthdateProcessor)



In [8]:
ml_fhir = MLOnFHIR(Patient, feature_attrs=['birthDate'], label_attrs=['gender'])
X, y, trained_clf = ml_fhir.fit(patients_by_procedure_code, DecisionTreeClassifier())

INFO:root:Extracting attributes from data set
INFO:root:Preprocessing data
INFO:root:Started training of clf
INFO:root:Training completed


#### The `birthDate` feature is now the age in days:

In [9]:
X[:5]

array([[21140],
       [20769],
       [26591],
       [22512],
       [28271]])