In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import yaml
import re

import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from functools import reduce

from fiber.cohort import Cohort
from fiber.condition import Procedure, Diagnosis, Drug, VitalSign, Patient, LabValue
from fiber.database.hana import engine, Session, print_sqla

from fiber.database.table import fact

# Cohort Definition

In [None]:
from fiber.condition import Diagnosis, Drug, LabValue, Patient, Procedure, VitalSign

In [None]:
min_age = Patient.age_in_days > 365 * 18
heart_surgery_condition = Procedure(code='35.%').with_(min_age) | Procedure(code='36.1%').with_(min_age)

In [None]:
heart_surgery_cohort = Cohort(heart_surgery_condition)

In [None]:
len(heart_surgery_cohort)

# Demographics

In [None]:
demographics = heart_surgery_cohort.demographics

In [None]:
demographics["age"]["figure"]

In [None]:
demographics["gender"]["figure"]

# Disease Onsets

In [None]:
aki = heart_surgery_cohort.has_onset(
    name="aki",
    condition=Diagnosis(code="584.9", context="ICD-9"), 
    time_deltas=[1, 7, 14, 28]
)
aki.head()

In [None]:
stroke = heart_surgery_cohort.has_onset(
    name="stroke",
    condition=Diagnosis(code='433.%') | Diagnosis(code='434.%') | Diagnosis(code='436.%'), 
    time_deltas=[1, 7, 14, 28]
)
stroke.head()

# Preconditions

In [None]:
renal_failure = heart_surgery_cohort.has_precondition(
    Diagnosis.from_condition_store(name="renal failure")
)
renal_failure.head()

In [None]:
cardiac_arrhythmias = heart_surgery_cohort.has_precondition(
    condition=Diagnosis.from_condition_store(name="cardiac arrhythmias")
)

# ML-Ready DataFrames

In [None]:
df = heart_surgery_cohort.build_data(
    cardiac_arrhythmias,
    renal_failure,
    stroke,
    aki,
)

In [None]:
VitalSign('Systolic Blood Pressure') & Drug('')

In [None]:
df.head(10)

# Lab Values

In [None]:
lv_crea = LabValue(name="CREATININE-SERUM")
lv_troponin = LabValue(name="TROPONIN%I%")

In [None]:
lv_creatinine_results = heart_surgery_cohort.results_for(lv_crea, before=heart_surgery_condition)
lv_troponin_results = heart_surgery_cohort.results_for(lv_troponin, before=heart_surgery_condition)

In [None]:
lv_creatinine_results['develops_aki'] = lv_creatinine_results.medical_record_number.isin(df[df.aki_28_days].medical_record_number)
lv_creatinine_results['develops_stroke'] = lv_creatinine_results.medical_record_number.isin(df[df.stroke_28_days].medical_record_number)

lv_troponin_results['develops_aki'] = lv_troponin_results.medical_record_number.isin(df[df.aki_28_days].medical_record_number)
lv_troponin_results['develops_stroke'] = lv_troponin_results.medical_record_number.isin(df[df.stroke_28_days].medical_record_number)

In [None]:
lv_creatinine_results = lv_creatinine_results[(lv_creatinine_results.numeric_value < 10) & (lv_creatinine_results.occurs_x_days_before < 10)]

figure, axes = plt.subplots(2, 2, figsize=(7, 7), sharey=False)
sns.distplot( lv_creatinine_results[lv_creatinine_results.develops_aki].numeric_value, color="red", ax=axes[0, 0], hist=False)
sns.distplot( lv_creatinine_results[lv_creatinine_results.develops_aki != True].numeric_value, color="skyblue", ax=axes[0, 0], hist=False)
sns.distplot( lv_creatinine_results[lv_creatinine_results.develops_stroke].numeric_value, color="red", ax=axes[0, 1], hist=False)
sns.distplot( lv_creatinine_results[lv_creatinine_results.develops_stroke != True].numeric_value, color="skyblue", ax=axes[0, 1], hist=False)

sns.distplot( lv_troponin_results[lv_troponin_results.develops_aki].numeric_value, color="red", ax=axes[1, 0], hist=False)
sns.distplot( lv_troponin_results[lv_troponin_results.develops_aki != True].numeric_value, color="skyblue", ax=axes[1, 0], hist=False)
sns.distplot( lv_troponin_results[lv_troponin_results.develops_stroke].numeric_value, color="red", ax=axes[1, 1], hist=False)
sns.distplot( lv_troponin_results[lv_troponin_results.develops_stroke != True].numeric_value, color="skyblue", ax=axes[1, 1], hist=False)


In [None]:
figure

In [None]:
_, bins = np.histogram(lv_troponin_results.numeric_value)
g = sns.FacetGrid(lv_troponin_results, hue="develops_stroke", height=4, aspect=1)
g = g.map(sns.distplot, "numeric_value", bins=bins, hist=False, kde=True)