In [1]:
%load_ext autoreload
%autoreload 2
import sys
import os
import glob
import random
from collections import defaultdict
from pathlib import Path

from IPython.display import display

import pandas as pd

from tqdm import tqdm
import jax
jax.config.update('jax_platform_name', 'gpu')
# jax.config.update("jax_debug_nans", True)

In [2]:


sys.path.append("../..")

from lib import utils as U
from lib.ehr.dataset import load_dataset
from lib.ehr.inpatient_interface import Patients

In [3]:
# import logging
# logging.root.level = logging.DEBUG

In [4]:
# from lib.ehr.coding_scheme import MIMIC4Procedures, MIMIC4ProcedureGroups
# from lib.ehr.coding_scheme import MIMIC4Input, MIMIC4InputGroups

# cproc = MIMIC4Procedures()
# cproc_g = MIMIC4ProcedureGroups()
# cinp = MIMIC4Input()
# cinp_g = MIMIC4InputGroups()

In [5]:
# Assign the folder of the dataset to `DATA_FILE`.
import dask

HOME = os.environ.get('HOME')
DATA_DIR = f'{HOME}/GP/ehr-data'
SOURCE_DIR = os.path.abspath("..")

with U.modified_environ(DATA_DIR=DATA_DIR), dask.config.set(scheduler='processes'):
    m4icu_dataset = load_dataset('M4ICU')
   

In [6]:
splits = m4icu_dataset.random_splits([0.8, 0.9], random_seed=42, balanced='subjects')

In [7]:
preprocessing = m4icu_dataset.fit_preprocessing(splits[0])

In [8]:
m4icu_dataset.apply_preprocessing(preprocessing)

In [9]:
m4inpatients = Patients(m4icu_dataset)

In [10]:
# from concurrent.futures import ThreadPoolExecutor
# with dask.config.set(pool=ThreadPoolExecutor(12)):
with dask.config.set(scheduler='processes', num_workers=12):
    m4inpatients = m4inpatients.load_subjects(splits[0][:100], num_workers=12)

  dob = anchor_date + anchor_age
                            dx_icd10->dx_icd9 Unrecognised t_codes
                            (169):
                            ['041.41', '041.42', '041.43', '041.49', '173.00', '173.01', '173.02', '173.09', '173.10', '173.11', '173.12', '173.19', '173.20', '173.21', '173.22', '173.29', '173.30', '173.31', '173.32', '173.39']...
                            dx_icd10->dx_icd9 Unrecognised s_codes
                            (49910):
                            ['E08.3211', 'E08.3212', 'E08.3213', 'E08.3219', 'E08.3291', 'E08.3292', 'E08.3293', 'E08.3299', 'E08.3311', 'E08.3312', 'E08.3313', 'E08.3319', 'E08.3391', 'E08.3392', 'E08.3393', 'E08.3399', 'E08.3411', 'E08.3412', 'E08.3413', 'E08.3419']...
                            dx_icd10->dx_icd9 Unrecognised t_codes
                            (169):
                            ['041.41', '041.42', '041.43', '041.49', '173.00', '173.01', '173.02', '173.09', '173.10', '173.11', '173.12', '173.19', '173.2

In [11]:
# m4inpatients.size_in_bytes() / 1024 ** 3

In [12]:
# val_batch = m4inpatients.device_batch(splits[1])

In [13]:
# tst_batch = m4inpatients.device_batch(splits[2])

In [14]:
# val_batch.size_in_bytes() / 1024 ** 3, tst_batch.size_in_bytes() / 1024 ** 3

In [15]:
batch = m4inpatients.device_batch(splits[0][:32])

Loading to device:   0%|          | 0/32 [00:00<?, ?subject/s]

In [16]:
batch.size_in_bytes() / 1024 ** 3

0.0056365299969911575

In [17]:
len(batch.subjects)

32

In [18]:
batch.n_admissions()

120

In [19]:
batch.n_segments()

7622

In [20]:
batch.n_obs_times()

5826

In [21]:
# import numpy as np
# import matplotlib.pyplot as plt

# a = m4inpatients_jax.obs_coocurrence_matrix
# a = np.array(a)
# plt.imshow(a, cmap='hot', interpolation='nearest')
# plt.show()

In [22]:
s = batch.subjects[splits[0][6]].admissions[0].interventions.input_
s

InpatientInput(
  index=i32[100],
  rate=f16[100],
  starttime=f32[100],
  endtime=f32[100],
  size=318
)

In [23]:
batch.interval_hours(splits[0][:10])

7167.583333333333

### التدريب على نموذج المعادلات التفاضلية الاعتيادية العصبية


In [24]:
from lib.ml.in_icenode import InICENODE, InICENODEDimensions
import jax.random as jrandom

In [25]:
dims = InICENODEDimensions(state_m=15, 
                state_dx_e=10,
                state_obs_e=25,
                input_e=10,
                proc_e=10,
                demo_e=5,
                int_e=15)
key = jrandom.PRNGKey(0)

m = InICENODE(dims=dims, 
              scheme=m4icu_dataset.scheme,
              key=key)

In [26]:
# res = m.batch_predict(batch, leave_pbar=True)

In [27]:
from lib.ml import InTrainer, MetricsHistory
from lib.metric import  (CodeAUC, UntilFirstCodeAUC, AdmissionAUC,
                      CodeGroupTopAlarmAccuracy, LossMetric, MetricsCollection)

from lib.ml import MinibatchLogger, EvaluationDiskWriter, ParamsDiskWriter, ConfigDiskWriter

In [28]:
config = {        
    "batch_size": 32,
    "lr": 1e-3,
    "epochs": 150,
    "opt": "adam",
    "reg_hyperparams": None
}
trainer = InTrainer(**config)
expt_dir = 'inicenode'

In [29]:
metrics = [
    CodeAUC(m4inpatients),
    UntilFirstCodeAUC(m4inpatients),
    AdmissionAUC(m4inpatients),
    LossMetric(m4inpatients)
]
reporters = [
        MinibatchLogger(config),
#         EvaluationDiskWriter(output_dir=expt_dir),
#         ParamsDiskWriter(output_dir=expt_dir),
#         ConfigDiskWriter(output_dir=expt_dir, config=config)
    ]
metrics = MetricsCollection(metrics)
history = MetricsHistory(metrics)

In [30]:
    
splits = m4inpatients.random_splits([0.9, 0.95], 
                                    balanced='admissions')
res = trainer(m, m4inpatients, splits=splits, history=history, 
             reporters=reporters)

Loading to device:   0%|          | 0/14 [00:00<?, ?subject/s]

  0%|          | 0/150 [00:00<?, ?Epoch/s]

  0%|          | 0/10 [00:00<?, ?Batch/s]

Loading to device:   0%|          | 0/10 [00:00<?, ?subject/s]

Embedding:   0%|          | 0/10 [00:00<?, ?subject/s]

  0%|          | 0.00/213.48 [00:00<?, ?odeint-days/s]



In [31]:
import jax.tree_util as jtu
import jax.numpy as jnp
import equinox as eqx

jtu.tree_map(lambda x: f'{x.shape} {jnp.any(jnp.isnan(x)).item()}' if eqx.is_array(x) else None , res['model'])

InICENODE(
  f_emb=InpatientEmbedding(
    f_dx_emb=MLP(
      layers=(
        Linear(
          weight='(50, 17375) True',
          bias='(50,) True',
          in_features=17375,
          out_features=50,
          use_bias=True
        ),
        Linear(
          weight='(10, 50) True',
          bias='(10,) True',
          in_features=50,
          out_features=10,
          use_bias=True
        )
      ),
      activation=None,
      final_activation=None,
      use_bias=True,
      use_final_bias=True,
      in_size=17375,
      out_size=10,
      width_size=50,
      depth=1
    ),
    f_dem_emb=MLP(
      layers=(
        Linear(
          weight='(25, 7) True',
          bias='(25,) True',
          in_features=7,
          out_features=25,
          use_bias=True
        ),
        Linear(
          weight='(5, 25) True',
          bias='(5,) True',
          in_features=25,
          out_features=5,
          use_bias=True
        )
      ),
      activation=None,
    