In [None]:
%run 00_default_options.ipynb

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import morpher.config
from sklearn.model_selection import train_test_split
from sklearn.calibration import calibration_curve, CalibratedClassifierCV

from robotehr.evaluation.interpretation import global_explanation
from robotehr.api.predictor import save_predictor, get_predictor_details
from robotehr.api.training import get_training_configuration
from robotehr.pipelines.supporters.preprocessing import DataLoader
from robotehr.pipelines.supporters.restoration import restore_model
from robotehr.evaluation.plots import calibration_plot
from robotehr.evaluation.risk_groups import make_risk_groups, plot_risk_groups
from robotehr.models.predictor import Predictor
from robotehr.utils import FriendlyNamesConverter

## UTI

In [None]:
predictor_uti = Predictor.load(id=20)
predictor_ovi = Predictor.load(id=19)
predictor_cmv = Predictor.load(id=17)

In [None]:
calibration_plot(predictor_uti, 'UTI')

In [None]:
calibration_plot(predictor_ovi, 'OVI')

In [None]:
fig, ax = plt.subplots(figsize=[8,6])
plt.yticks(rotation=0, fontsize=14)
plt.xticks(rotation=0, fontsize=14)
ax.xaxis.label.set_size(18)
ax.yaxis.label.set_size(18)
plt.rc('legend', fontsize=12)
calibration_plot(predictor_ovi, 'CMV', ax=ax, title="")
fig.savefig('/home/martet02/calibration_plot_ovi.pdf')
fig.savefig('/home/martet02/calibration_plot_ovi.png')

In [None]:
fig.savefig('/home/martet02/calibration_plot_cmv.pdf')
fig.savefig('/home/martet02/calibration_plot_cmv.png')

In [None]:
regexes = [
    'any__Diagnosis__',
    'any__Procedure__', 
    'any__Drug__', 
    'any__Material__', 
    'any__Encounter__', 
    'any__AlcoholUse__', 
    'any__DrugUse__', 
    'any__TobaccoUse__',
    'min__height__', 
    'min__weight__', 
    'min__vitalsign__', 
    'min__measurement__', 
    'min__labvalue__', 
    'max__height__', 
    'max__weight__', 
    'max__vitalsign__', 
    'max__measurement__', 
    'max__labvalue__'
]

agg_func_regex = ""
for r in regexes:
    agg_func_regex += r + "|"
agg_func_regex = agg_func_regex[:-1]

def prepare_data_function(data, target):
    import morpher
    from morpher.jobs import Impute, Scale
    from sklearn.preprocessing import OneHotEncoder
    from enum import Enum
    
    # remove unused features
    del data['medical_record_number']
    del data['mother_account_number']
    del data['date_of_birth']
    del data['month_of_birth']
    del data['patient_ethnic_group']
    del data['religion']
    del data['address_zip']
    del data['deceased_indicator']
    del data['marital_status_code']
    
    class RaceType(str, Enum):
        AFRICAN = 'African'
        AMERICAN_BLACK = 'Black or African-American'
        AMERICAN_NATIVE = 'Native American'
        ASIAN = 'Asian'
        ASIAN_PACIFIC = 'Asian Pacific'
        ASIAN_INDIAN = 'Asian Indian'
        ASIAN_CHINESE = 'Asian Chinese'
        HISPANIC = 'Hispanic or Latino'
        OTHER = 'Other'
        WHITE = 'White'
    
    RACE_MAPPING = {
        RaceType.AFRICAN: [
            'Cape Verdian',
            'Congolese',
            'Eritrean',
            'Ethiopian',
            'Gabonian',
            'Ghanaian',
            'Guinean',
            'Ivory Coastian',
            'Kenyan',
            'Liberian',
            'Madagascar',
            'Malian',
            'Nigerian',
            'Other: East African',
            'Other: North African',
            'Other: South African',
            'Other: West African',
            'Senegalese',
            'Sierra Leonean',
            'Somalian',
            'Sudanese',
            'Tanzanian',
            'Togolese',
            'Ugandan',
            'Zimbabwean'
        ],
        RaceType.AMERICAN_BLACK: [
            'African American (Black)',
            'African-American',
            'Black Or African-American',
            'Black or African - American',
        ],
        RaceType.AMERICAN_NATIVE: [
            'American (Indian/Alaskan)',
            'Native American'
        ],
        RaceType.ASIAN: [
            'Asian',
            'Bangladeshi',
            'Bhutanese',
            'Burmese',
            'Cambodian',
            'Hmong',
            'Indonesian',
            'Japanese',
            'Korean',
            'Laotian',
            'Malaysian',
            'Maldivian',
            'Nepalese',
            'Okinawan',
            'Pakistani',
            'Singaporean',
            'Taiwanese',
            'Thai',
            'Vietnamese',
            'Yapese'
        ],
        RaceType.ASIAN_PACIFIC: [
            'Asian (Pacific Islander)',
            'Carolinian',
            'Chamorro',
            'Chuukese',
            'Fijian',
            'Filipino',
            'Guamanian',
            'Guamanian Or Chamorro',
            'Guamanian or Chamorro',
            'Iwo Jiman',
            'Kiribati',
            'Kosraean',
            'Mariana Islander',
            'Marshallese',
            'Melanesian',
            'Micronesian',
            'Native Hawaiian',
            'New Hebrides',
            'Other Pacific Islander',
            'Pacific Islander',
            'Palauan',
            'Pohnpeian',
            'Polynesian',
            'Saipanese',
            'Samoan',
            'Papua New Guinean',
            'Tahitian',
            'Tokelauan',
            'Tongan'
        ],
        RaceType.ASIAN_INDIAN: [
            'Asian Indian',
            'Sri Lankan',
            'Sri lankan',
            'West Indian'
        ],
        RaceType.ASIAN_CHINESE: [
            'Chinese',
        ],
        RaceType.HISPANIC: [
            'Barbadian',
            'Dominica Islander',
            'Grenadian',
            'Haitian',
            'Hispanic/Latino',
            'Jamaican',
            'St Vincentian',
            'Trinidadian'
        ],
        RaceType.OTHER: [
            '',
            'Aa',
            'Ab',
            'Af',
            'Ag',
            'Ak',
            'Al',
            'Ap',
            'Ar',
            'Av',
            'Ay',
            'B',
            'B1',
            'B2',
            'B3',
            'B4',
            'B5',
            'B6',
            'B7',
            'B8',
            'B9',
            'Ba',
            'Bb',
            'Bc',
            'Bd',
            'Be',
            'Bf',
            'Bg',
            'Bh',
            'Bj',
            'Bk',
            'Bm',
            'Bn',
            'Bo',
            'Bp',
            'Bq',
            'Br',
            'Bs',
            'Bt',
            'Bu',
            'Bv',
            'Bw',
            'Bx',
            'By',
            'Bz',
            'I',
            'MSDW_NOT APPLICABLE',
            'MSDW_OTHER',
            'MSDW_UNKNOWN',
            'NOT AVAILABLE',
            'Non Hispanic',
            'O',
            'Other',
            'Pk',
            'Pl',
            'Pm',
            'Po',
            'Ps',
            'Pv',
            'U',
            'Unk',
            'Unknown',
            'W'
        ],
        RaceType.WHITE: [
            'Caucasian (White)',
            'White'
        ]
    }
    
    # value mapping
    data['race'] = (
        data.race.map({
            label: cat for cat, labels in RACE_MAPPING.items()
            for label in labels
        }).astype(pd.api.types.CategoricalDtype(RaceType))
    )
    
    # pre-encoding nan handling
    for column in ['gender', 'race']:
        enc = OneHotEncoder(sparse=False)
        transformed_data = enc.fit_transform(data[[column]])
        transformed_columns = pd.DataFrame(
            data=transformed_data, 
            columns=[f'{column}_{c}' for c in enc.categories_[0]]
        )
        data = data.join(transformed_columns)
        del data[column]
        
    label_encoded_data = data.copy()
    
    label_encoded_data[label_encoded_data.columns[label_encoded_data.columns.str.contains('any')]] = label_encoded_data[label_encoded_data.columns[label_encoded_data.columns.str.contains('any')]].fillna(False)
    
    # scaled_data, _ = Scale().execute(data=label_encoded_data, target=target)
    # imputed_data, _ = Impute().execute(data=scaled_data, imputation_method=morpher.config.imputers.DEFAULT)
    
    return label_encoded_data

data_loader = DataLoader(agg_func_regex, prepare_data_function)

In [None]:
df = make_risk_groups(predictor, data_loader)

In [None]:
features = ['max__height__height_from_361_days_before_to_331_days_before', 'gender_Female',]

In [None]:
plot_risk_groups(df, features)

# CMV

In [None]:
tc = get_training_configuration(
    pipeline_id=231,
    response_type="object",
    config={
        'threshold_numeric': 0.05,
        'window_start_numeric': -361,
        'target': 'cytomegaloviral_disease_onset_from_0_days_after_to_365_days_after'
        }
)

In [None]:
restored_model = restore_model(tc, morpher.config.algorithms.GBDT, morpher.config.samplers.RANDOM)

In [None]:
predictor = save_predictor(restored_model, tc, 'cmv, baseline 4 rfe + numeric focus, 0d GBDT', '1.0', response_type="object")

In [None]:
predictor = Predictor.load(id=17)

In [None]:
calibration_plot(predictor, 'LR')

In [None]:
df = make_risk_groups(predictor, data_loader)

In [None]:
explanations = global_explanation(predictor, num_features=30, explainers=[morpher.config.explainers.FEAT_CONTRIB])

In [None]:
interesting_features = list(explanations[morpher.config.explainers.FEAT_CONTRIB])[:15]

In [None]:
plot_risk_groups(df, interesting_features)

# other

In [None]:
tc = get_training_configuration(
    pipeline_id=196,
    response_type="object",
    config={
        'threshold_numeric': 0.05,
        'window_start_numeric': -331,
        'target': 'other_viral_infection_onset_from_0_days_after_to_365_days_after'
        }
)

In [None]:
restored_model = restore_model(tc, morpher.config.algorithms.LR, morpher.config.samplers.SMOTE)

In [None]:
predictor = save_predictor(restored_model, tc, 'other viral infections, baseline 4 numeric LR, 0d', '1.0', response_type="object")

In [None]:
predictor.id

In [None]:
predictor = get_predictor_details(predictor_id=16, response_type="object")

In [None]:
calibration_plot(predictor, 'LR')

In [None]:
df = make_risk_groups(predictor, data_loader)

In [None]:
explanations = global_explanation(predictor, num_features=30, explainers=[morpher.config.explainers.FEAT_CONTRIB])

In [None]:
interesting_features = list(explanations[morpher.config.explainers.FEAT_CONTRIB])[:15]

In [None]:
plot_risk_groups(df, interesting_features)