In [1]:
%load_ext autoreload
%autoreload 2
import sys
import os
import glob
import random
from collections import defaultdict
from pathlib import Path

from IPython.display import display

import pandas as pd
import dask

from tqdm import tqdm
import jax
jax.config.update('jax_platform_name', 'cpu')
# jax.config.update('jax_log_compiles', True)
# jax.config.update("jax_debug_nans", True)
# jax.config.update("jax_enable_x64", True)

In [2]:


sys.path.append("../..")

from lib import utils as U
from lib.ehr import load_dataset, load_dataset_scheme, load_dataset_config, Dataset
from lib.ehr.interface import Patients, InterfaceConfig
from lib.ehr.concepts import DemographicVectorConfig, LeadingObservableConfig


In [3]:
import logging
logging.root.level = logging.INFO

In [4]:
# tag = 'M4ICU'
# PATH = f'{os.environ.get("HOME")}/GP/ehr-data/mimic4icu-cohort'
# sample = 300
# offset = 50
# cache =  f'cached_inteface/patients_{tag}_S{sample}_O{offset}'
# dataset_config = load_dataset_config(tag, 
#                                      sample=sample,
#                                      offset=offset,
#                                      path=PATH)

In [5]:
tag = 'M4ICU'
PATH = f'{os.environ.get("HOME")}/GP/ehr-data/mimic4icu-cohort'
sample = 30
offset = 50
time_binning = 6
# time_binning = None

cache =  f'cached_inteface/patients_{tag}_S{sample}_O{offset}_TB{time_binning}h'
# cache =  f'cached_inteface/patients_{tag}_S{sample}_O{offset}'
dataset_config = load_dataset_config(tag, 
                                     sample=sample,
                                     offset=offset,
                                     path=PATH)

In [6]:
cache

'cached_inteface/patients_M4ICU_S30_O50_TB6h'

##### Possible Interface Scheme Configurations

In [7]:
import json
dataset_scheme = load_dataset_scheme(tag)
interface_schem_options = dataset_scheme.supported_target_scheme_options
print(json.dumps(interface_schem_options, sort_keys=True, indent=4))

{
    "dx": [
        "DxFlatCCS",
        "DxCCS",
        "DxICD10",
        "DxICD9"
    ],
    "ethnicity": [
        "MIMIC4Eth32",
        "MIMIC4Eth5"
    ],
    "gender": [
        "Gender"
    ],
    "int_input": [
        "MIMICInput",
        "MIMICInputGroups"
    ],
    "int_proc": [
        "MIMICProcedures",
        "MIMICProcedureGroups"
    ],
    "obs": [
        "MIMICObservables"
    ],
    "outcome": [
        "dx_icd9_filter_v3_groups",
        "dx_flatccs_mlhc_groups",
        "dx_icd9_filter_v1",
        "dx_flatccs_filter_v1",
        "dx_icd9_filter_v2_groups"
    ]
}


#### Leading Observable for Early Prediction Task

In [8]:
scheme_df = dataset_scheme.obs.as_dataframe()
display(scheme_df[scheme_df.desc.str.contains('aki')])

Unnamed: 0,code,desc
42,o42,aki_stage_smoothed


In [9]:

# Demographic vector attributes
demographic_vector_conf = DemographicVectorConfig(
    age=True, 
    gender=True, 
    ethnicity=True
)

# Leading 
leading_AKI = LeadingObservableConfig(leading_hours=[6, 12, 24, 48, 72],
                                      window_aggregate='max',
                                      scheme=dataset_scheme.obs,
                                      index=42)

In [10]:
interface_scheme = dataset_scheme.make_target_scheme_config(dx='DxICD9',
                                                            outcome='dx_icd9_filter_v3_groups',
                                                            ethnicity='MIMIC4Eth5')
interface_config = InterfaceConfig(scheme=interface_scheme,
                                   dataset_scheme=dataset_scheme,
                                   demographic_vector=demographic_vector_conf,
                                   leading_observable=leading_AKI,
                                   time_binning=time_binning,
                                   cache=cache)

### التدريب على نموذج المعادلات التفاضلية الاعتيادية العصبية


In [11]:
from lib.ml import (InICENODE, InICENODEConfig, InpatientEmbeddingConfig,  
                    SplitConfig, InpatientLiteEmbeddingConfig, DeepMindPatientEmbeddingConfig,
                    InTrainer, TrainerConfig, TrainerReporting, OptimizerConfig, WarmupConfig, ReportingConfig,
                   InGRU, InGRUJump, InICENODELite, InICENODELiteConfig)
from lib.metric import  (CodeAUC, UntilFirstCodeAUC, AdmissionAUC, CodeLevelMetricConfig, MetricLevelsConfig,
                         LossMetricConfig, CodeGroupTopAlarmAccuracy, LossMetric, ObsCodeLevelLossMetric, 
                         CodeGroupTopAlarmAccuracyConfig, LeadingObsTrends, 
                        AKISegmentedAdmissionMetric, AKISegmentedAdmissionConfig,
                        LeadingPredictionAccuracyConfig, LeadingPredictionAccuracy)
from lib.ml import Experiment, InpatientExperiment, ExperimentConfig, SplitConfig

import jax.random as jrandom

In [12]:
# emb_dims = InpatientEmbeddingConfig(dx=30, inp=15, proc=15, 
#                                         demo=5, 
#                                         inp_proc_demo=10)
# model_config = InICENODEConfig(mem=15, obs=25, lead=5, emb=emb_dims,
#                               lead_predictor='mlp')
# model_classname = InICENODE.__name__

In [13]:
# emb_dims = InpatientEmbeddingConfig(dx=30, inp=15, proc=15, 
#                                         demo=5, 
#                                         inp_proc_demo=10)
# model_config = InICENODEConfig(mem=15, obs=25, lead=5, emb=emb_dims,
#                               lead_predictor='mlp')
# model_classname = InICENODELite.__name__

In [14]:
# emb_dims = InpatientLiteEmbeddingConfig(dx=30, demo=5)
# model_config = InICENODELiteConfig(mem=15, obs=25, lead=5, emb=emb_dims,
#                               lead_predictor='mlp')
# model_classname = InGRUJump.__name__

In [16]:
emb_dims = DeepMindPatientEmbeddingConfig(dx=30, demo=5, obs=20, sequence=50)
model_config = InICENODELiteConfig(state=50, emb=emb_dims,
                                   lead_predictor='mlp')
model_classname = InGRU.__name__

In [17]:
trainer_config = TrainerConfig(optimizer=OptimizerConfig(opt='adam', lr=1e-3),
                          epochs=3,
                          batch_size=64,
                          dx_loss='balanced_focal_bce',
                          obs_loss='mse',
                          lead_loss='mse')

warmup = WarmupConfig(epochs=0.1, 
                      batch_size=8,
                      opt='adam', lr=1e-3, 
                      decay_rate=0.5)




In [18]:
dx_loss = ["softmax_bce", "balanced_focal_softmax_bce", "balanced_focal_bce",
          "allpairs_exp_rank", "allpairs_hard_rank", "allpairs_sigmoid_rank"]
lead_loss = ["mse", "mae", "rms", "softdtw(0.1)"]
obs_loss =  ["mse", "mae", "rms"]
                
metrics_conf = [
#     (CodeAUC, CodeLevelMetricConfig(aggregate_level=True, code_level=True)),
#     (AdmissionAUC, MetricLevelsConfig(admission=False, aggregate=True, subject_aggregate=False)),
#     (CodeGroupTopAlarmAccuracy, CodeGroupTopAlarmAccuracyConfig(n_partitions=5, top_k_list=[3, 5, 10, 15, 20])),
    (LossMetric, LossMetricConfig(dx_loss=dx_loss, lead_loss=lead_loss, obs_loss=obs_loss)),
#     (LeadingObsTrends, CodeLevelMetricConfig(aggregate_level=True, code_level=True)),
#     (AKISegmentedAdmissionMetric, AKISegmentedAdmissionConfig()),
    (LeadingPredictionAccuracy, LeadingPredictionAccuracyConfig())
]
metrics_conf = [m.export_module_class(c) for m, c in metrics_conf]

In [19]:
reporting_conf = ReportingConfig(output_dir='mlp_ingru',
                                 console=True,
                                 model_stats=False,
                                 parameter_snapshots=True,
                                 config_json=True)

In [21]:
expt_config = ExperimentConfig(dataset=dataset_config,
                              interface=interface_config,
                              split=SplitConfig(train=0.8, val=0.1, test=0.1, balanced='admissions'),
                              trainer=trainer_config,
                              metrics=metrics_conf,
                              reporting=reporting_conf,
                              model=model_config,
                              model_classname=model_classname,
                              n_evals=100,
                              continue_training=True,
                              warmup=None,
                              reg_hyperparams=None,
                              model_snapshot_frequency=10,
                              trainer_classname='InTrainer')

In [22]:
experiment = InpatientExperiment(expt_config)

In [21]:
# IF = experiment.load_interface()
# m = experiment.load_model(IF)

In [22]:
# m._f_emb(IF.subjects['10630336'])

In [23]:
# m._f_emb(IF.subjects[''])

In [24]:
# m._f_init

In [23]:
result = experiment.run()

INFO:root:Cache does not match config, ignoring cache.
INFO:root:Loading subjects from scratch.
Process SpawnProcess-12:
Traceback (most recent call last):
  File "/home/asem/GP/env/icenode-dev/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/home/asem/GP/env/icenode-dev/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/asem/GP/env/icenode-dev/lib/python3.9/concurrent/futures/process.py", line 240, in _process_worker
    call_item = call_queue.get(block=True)
  File "/home/asem/GP/env/icenode-dev/lib/python3.9/multiprocessing/queues.py", line 103, in get
    res = self._recv_bytes()
  File "/home/asem/GP/env/icenode-dev/lib/python3.9/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/home/asem/GP/env/icenode-dev/lib/python3.9/multiprocessing/connection.py", line 414, in _recv_bytes
    buf = self._recv(4)
  File "/home/asem/

KeyboardInterrupt: 

In [25]:

U.write_config(expt_config.to_dict(), 'conf.json')

In [31]:
expt_config.model

InICENODELiteConfig(
  emb=DeepMindPatientEmbeddingConfig(dx=30, demo=5, obs=20, sequence=50),
  mem=15,
  obs=25,
  lead=5,
  lead_predictor='mlp'
)

In [None]:
import pandas as pd

# df = pd.read_csv('/home/asem/GP/ehr-data/m4icu_out/mlp_mse_icenodelite/val_evals.csv.gz')
df = pd.read_csv('/home/asem/GP/ehr-data/m4icu_out/mlp_mse_icenode/val_evals.csv.gz', index_col=0)

# df = pd.read_csv('mlp_inicenode/val_evals.csv.gz')

In [None]:
auc1_cols = [c for c in df.columns if 'first_emergence_auc' in c]
auc2_cols = [c for c in df.columns if 'AKISegmentedAdmissionMetric.emergence_auc' in c]
auc3_cols = [c for c in df.columns if 'all_emergence_auc' in c]
loss_cols = [c for c in df.columns if 'Loss' in c]

In [None]:
df[auc3_cols]

In [None]:
df['elapsed_time'] / 3600