In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from fhir_client import FHIRClient
import logging
import pandas as pd
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

import preprocessing


client = FHIRClient(service_base_url='https://r3.smarthealthit.org', preprocessor=preprocessing, logger=logger)



In [3]:
procedures = client.get_all_procedures()
pd.DataFrame([prod.code['coding'][0] for prod in procedures]).drop_duplicates().sort_values(by=['display']).head()

KeyboardInterrupt: 

In [None]:
conditions = client.get_all_conditions()
pd.DataFrame([cond.code['coding'][0] for cond in conditions]).drop_duplicates().sort_values(by=['display']).head()

In [3]:
# Get patients by condition
patients_by_condition_text = client.get_patients_by_condition_text("Acute viral pharyngitis")

In [4]:
patients_by_condition_text = client.get_patients_by_condition_text("Abdominal pain")

INFO:root:Received 154 observations in 0.74 seconds.
INFO:root:Received 59 observations in 0.32 seconds.
INFO:root:Received 162 observations in 0.67 seconds.
INFO:root:Received 206 observations in 0.81 seconds.
INFO:root:Received 4 patients in 3.04 seconds.


In [5]:
len(patients_by_condition_text)

4

## Machine Learning

In [8]:
from ml_on_fhir import MLOnFHIRClassifier, MLOnFHIRCluster
from fhir_objects.patient import Patient
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.svm import SVC

ml_fhir = MLOnFHIRClassifier(Patient, feature_attrs=['birthDate'], label_attrs=['gender'])
X, y, trained_clf = ml_fhir.fit(patients_by_condition_text, DecisionTreeClassifier())

from sklearn.metrics import accuracy_score, roc_curve, auc
fpr, tpr, _ = roc_curve(y, trained_clf.predict(X))
print("Prediction accuracy {}".format( auc(fpr, tpr) ) )

INFO:root:Extracting attributes from data set
INFO:root:Preprocessing data
INFO:root:Started training of clf
INFO:root:Training completed
INFO:root:Accuracy : 1.0, F1-score : 1.0


Prediction accuracy 1.0


In [14]:
# Evaluate classifier:
ml_fhir.evaluate(X, y)

{'AUPRC': 0.7573813643401273,
 'AUROC': 0.6717785033813943,
 'accuracy': 0.6713286713286714,
 'f1_score': 0.6713286713286714,
 'f1_score_class': array([0.67586207, 0.66666667]),
 'fn': 103,
 'fp': 85,
 'precision': array([0.65551839, 0.68864469]),
 'recall': array([0.6975089 , 0.64604811]),
 'support': array([281, 291]),
 'tn': 196,
 'tp': 188}

In [12]:
# Try clustering
from sklearn.cluster import KMeans
ml_fhir = MLOnFHIRCluster(Patient, feature_attrs=['birthDate'], label_attrs=['gender'])
X, y, trained_cluster = ml_fhir.fit(patients_by_condition_text, KMeans(n_clusters=2))


INFO:root:Extracting attributes from data set
INFO:root:Preprocessing data
INFO:root:Started clustering
INFO:root:Clustering completed
INFO:root:Silhouette score : 0.6118831953071083


In [72]:
trained_clf.score(X)

-5492.113919812582