In [None]:
%run ../00_default_options.ipynb

In [None]:
import pickle

import matplotlib.pyplot as plt

from robotehr.api.predictor import get_predictor_details
from robotehr.utils import FriendlyNamesConverter

# VIOLINS

In [None]:
from robotehr.evaluation.risk_groups import make_risk_groups, plot_risk_groups
from robotehr.models.predictor import Predictor

In [None]:
predictor_uti = Predictor.load(id=20)
predictor_ovi = Predictor.load(id=19)
predictor_cmv = Predictor.load(id=17)

In [None]:
# must be defined for legacy reasons
regexes = [
    'any__Diagnosis__',
    'any__Procedure__', 
    'any__Drug__', 
    'any__Material__', 
    'any__Encounter__', 
    'any__AlcoholUse__', 
    'any__DrugUse__', 
    'any__TobaccoUse__',
    'min__height__', 
    'min__weight__', 
    'min__vitalsign__', 
    'min__measurement__', 
    'min__labvalue__', 
    'max__height__', 
    'max__weight__', 
    'max__vitalsign__', 
    'max__measurement__', 
    'max__labvalue__'
]

column_selector = ""
for r in regexes:
    column_selector += r + "|"
column_selector = column_selector[:-1]

class CustomDataLoader(DataLoader):
    def transform(self, X, y):
        from sklearn.preprocessing import OneHotEncoder
        from enum import Enum

        # remove unused features
        del X['medical_record_number']
        del X['mother_account_number']
        del X['date_of_birth']
        del X['month_of_birth']
        del X['patient_ethnic_group']
        del X['religion']
        del X['address_zip']
        del X['deceased_indicator']
        del X['marital_status_code']

        class RaceType(str, Enum):
            AFRICAN = 'African'
            AMERICAN_BLACK = 'Black or African-American'
            AMERICAN_NATIVE = 'Native American'
            ASIAN = 'Asian'
            ASIAN_PACIFIC = 'Asian Pacific'
            ASIAN_INDIAN = 'Asian Indian'
            ASIAN_CHINESE = 'Asian Chinese'
            HISPANIC = 'Hispanic or Latino'
            OTHER = 'Other'
            WHITE = 'White'

        RACE_MAPPING = {
            RaceType.AFRICAN: [
                'Cape Verdian',
                'Congolese',
                'Eritrean',
                'Ethiopian',
                'Gabonian',
                'Ghanaian',
                'Guinean',
                'Ivory Coastian',
                'Kenyan',
                'Liberian',
                'Madagascar',
                'Malian',
                'Nigerian',
                'Other: East African',
                'Other: North African',
                'Other: South African',
                'Other: West African',
                'Senegalese',
                'Sierra Leonean',
                'Somalian',
                'Sudanese',
                'Tanzanian',
                'Togolese',
                'Ugandan',
                'Zimbabwean'
            ],
            RaceType.AMERICAN_BLACK: [
                'African American (Black)',
                'African-American',
                'Black Or African-American',
                'Black or African - American',
            ],
            RaceType.AMERICAN_NATIVE: [
                'American (Indian/Alaskan)',
                'Native American'
            ],
            RaceType.ASIAN: [
                'Asian',
                'Bangladeshi',
                'Bhutanese',
                'Burmese',
                'Cambodian',
                'Hmong',
                'Indonesian',
                'Japanese',
                'Korean',
                'Laotian',
                'Malaysian',
                'Maldivian',
                'Nepalese',
                'Okinawan',
                'Pakistani',
                'Singaporean',
                'Taiwanese',
                'Thai',
                'Vietnamese',
                'Yapese'
            ],
            RaceType.ASIAN_PACIFIC: [
                'Asian (Pacific Islander)',
                'Carolinian',
                'Chamorro',
                'Chuukese',
                'Fijian',
                'Filipino',
                'Guamanian',
                'Guamanian Or Chamorro',
                'Guamanian or Chamorro',
                'Iwo Jiman',
                'Kiribati',
                'Kosraean',
                'Mariana Islander',
                'Marshallese',
                'Melanesian',
                'Micronesian',
                'Native Hawaiian',
                'New Hebrides',
                'Other Pacific Islander',
                'Pacific Islander',
                'Palauan',
                'Pohnpeian',
                'Polynesian',
                'Saipanese',
                'Samoan',
                'Papua New Guinean',
                'Tahitian',
                'Tokelauan',
                'Tongan'
            ],
            RaceType.ASIAN_INDIAN: [
                'Asian Indian',
                'Sri Lankan',
                'Sri lankan',
                'West Indian'
            ],
            RaceType.ASIAN_CHINESE: [
                'Chinese',
            ],
            RaceType.HISPANIC: [
                'Barbadian',
                'Dominica Islander',
                'Grenadian',
                'Haitian',
                'Hispanic/Latino',
                'Jamaican',
                'St Vincentian',
                'Trinidadian'
            ],
            RaceType.OTHER: [
                '',
                'Aa',
                'Ab',
                'Af',
                'Ag',
                'Ak',
                'Al',
                'Ap',
                'Ar',
                'Av',
                'Ay',
                'B',
                'B1',
                'B2',
                'B3',
                'B4',
                'B5',
                'B6',
                'B7',
                'B8',
                'B9',
                'Ba',
                'Bb',
                'Bc',
                'Bd',
                'Be',
                'Bf',
                'Bg',
                'Bh',
                'Bj',
                'Bk',
                'Bm',
                'Bn',
                'Bo',
                'Bp',
                'Bq',
                'Br',
                'Bs',
                'Bt',
                'Bu',
                'Bv',
                'Bw',
                'Bx',
                'By',
                'Bz',
                'I',
                'MSDW_NOT APPLICABLE',
                'MSDW_OTHER',
                'MSDW_UNKNOWN',
                'NOT AVAILABLE',
                'Non Hispanic',
                'O',
                'Other',
                'Pk',
                'Pl',
                'Pm',
                'Po',
                'Ps',
                'Pv',
                'U',
                'Unk',
                'Unknown',
                'W'
            ],
            RaceType.WHITE: [
                'Caucasian (White)',
                'White'
            ]
        }

        # value mapping
        X['race'] = (
            X.race.map({
                label: cat for cat, labels in RACE_MAPPING.items()
                for label in labels
            }).astype(pd.api.types.CategoricalDtype(RaceType))
        )

        # pre-encoding nan handling
        for column in ['gender', 'race']:
            enc = OneHotEncoder(sparse=False)
            transformed_data = enc.fit_transform(X[[column]])
            transformed_columns = pd.DataFrame(
                data=transformed_data, 
                columns=[f'{column}_{c}' for c in enc.categories_[0]]
            )
            X = X.join(transformed_columns)
            del X[column]
        X[X.columns[X.columns.str.contains('any')]] = X[X.columns[X.columns.str.contains('any')]].fillna(False)
        return X, y
        
    def transform_training_data(self, X_train, y_train):
        from sklearn.preprocessing import StandardScaler
        scaler = StandardScaler()
        self.objects['scaler'] = scaler
        X_train_scaled = scaler.fit_transform(X_train)
        
        from sklearn.impute import SimpleImputer
        imputer = SimpleImputer()
        self.objects['imputer'] = imputer
        X_train_imputed = imputer.fit_transform(X_train_scaled)
        
        X_train = pd.DataFrame(
            data=X_train_imputed, 
            columns=X_train.columns,
            index=X_train.index
        )
        return X_train, y_train
    
    def transform_test_data(self, X_test, y_test):
        scaler = self.objects['scaler']
        imputer = self.objects['imputer']
        X_test_scaled = scaler.transform(X_test)
        X_test_imputed = imputer.fit_transform(X_test_scaled)
        
        X_test = pd.DataFrame(
            data=X_test_imputed, 
            columns=X_test.columns,
            index=X_test.index
        )
        return X_test, y_test
        
data_loader = CustomDataLoader(column_selector=column_selector)

In [None]:
# for the future (when with pipeline with separated preprocessing)
# from robotehr.pipelines.supporters.preprocessing import DataLoader
# data_loader = DataLoader.load(predictor_uti.training_configuration.training_pipeline.data_loader_path)

In [None]:
plt.rcParams['text.usetex'] = True 
plt.rcParams['text.latex.preamble'] = [r'\usepackage[cm]{sfmath}']
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'cm'
plt.rcParams['font.size'] = 14

## UTI

In [None]:
columns_uti = {
    'any__Procedure__CPT-4__71020': 'Radiologic chest examination',
    'any__Encounter__Preadmit Testing': 'Preadmit testing',
    'min__labvalue__rbc_blood_cell_from_31_days_before_to_1_days_before': 'Red blood cell count (30d, min)',
    'any__Diagnosis__EPIC__TRANSPLANT ORGAN OFFER': 'Transplant organ offer',
    'any__Diagnosis__ICD-10__N18.6': 'End-stage renal disease', 
    'gender_Female': 'Female gender',
    'min__labvalue__ventricular_rate_from_31_days_before_to_1_days_before': 'Ventricular rate (30d, min)',
    'any__Diagnosis__ICD-10__R06.02': 'Shortness of breath',
    'min__labvalue__protein_total-bld_from_31_days_before_to_1_days_before': 'Total protein (blood, 30d, min)',
    'any__Procedure__Cardiac Cath Performed__LV Angiography': 'Left heart ventricular angiopathy',
}

df_uti = make_risk_groups(predictor_uti, data_loader)

plot_risk_groups(
    df=df_uti, 
    features=list(columns_uti)[:6], 
    friendly_names_converter=dict([[k, columns_uti[k]] for k in list(columns_uti)[:6]]),
    filename="/home/martet02/figures/new-violins-uti.pdf",
).show()

## CMV

In [None]:
temp_columns_cmv = {
    'any__Diagnosis__ICD-10__N18.4': 'Chronic kidney disease',
    'any__Diagnosis__EPIC__ESTABLISHED PATIENT / CHRONIC PROBLEM': 'Chronic illness',
    'any__Diagnosis__ICD-10__I10': 'Essential hypertension',
    #'min__labvalue__nrbc#_from_31_days_before_to_1_days_before': 'Nucleated red blood cells \# (30d, min)',
    #'race_Hispanic or Latino': 'Hispanic ethnicity',
    'any__Diagnosis__ICD-9__R69': 'Unspecified illness',
    'any__Diagnosis__ICD-10__D63.1': 'Anemia in CKD',
    'any__Diagnosis__APRDRG MDC__011': 'Kidney \& urin tract diseases',
    'any__Diagnosis__ICD-10__N18.6': 'End-stage renal disease',
    'any__Diagnosis__IMO__4908': 'Benign hypertension'
}

df_cmv = make_risk_groups(predictor_cmv, data_loader)


plot_risk_groups(
    df=df_cmv, 
    features=list(temp_columns_cmv)[:6], 
    friendly_names_converter=dict([[k, temp_columns_cmv[k]] for k in list(temp_columns_cmv)[:6]]),
    filename="/home/martet02/figures/new-violins-cmv.pdf",
).show()

## OVI

In [None]:
columns_ovi = {
    'any__Diagnosis__ICD-9__070.54': 'Chronic hepatitis C',
    'max__labvalue__monocyte_%_from_31_days_before_to_1_days_before': 'Monocyte \% (30d, max)',
    'max__weight__weight_from_31_days_before_to_1_days_before': 'Weight (30d, max)',
    'min__labvalue__monocyte_%_from_331_days_before_to_301_days_before': 'Monocyte \% (330-300d, min)',
    'min__labvalue__white_blood_cell_from_211_days_before_to_181_days_before': 'White blood cell (210-180d, min)',
    'max__labvalue__ast_(sgot)_from_31_days_before_to_1_days_before': 'Aspartate aminotransferase (30d, max)',
    'min__labvalue__ast_(sgot)_from_181_days_before_to_151_days_before': 'Aspartate aminotransferase (180-150d, min)',
    'min__vitalsign__ibex__(t-t)_from_31_days_before_to_1_days_before': 'Tympanic temperature (30d, min)',
    'max__labvalue__pro_time_from_31_days_before_to_1_days_before': 'Prothrombin time (30d, max)',
    'max__labvalue__alt(sgpt)_from_31_days_before_to_1_days_before': 'Alanine transaminase (30d, max)'
}

df_ovi = make_risk_groups(predictor_ovi, data_loader)

plot_risk_groups(
    df=df_ovi, 
    features=list(columns_ovi)[:6], 
    friendly_names_converter=dict([[k, columns_ovi[k]] for k in list(columns_ovi)[:6]]),
    filename="/home/martet02/figures/new-violins-ovi.pdf"
).show()