In [9]:
from pyhealth.datasets import MIMIC3Dataset
from pyhealth.datasets import split_by_patient, get_dataloader
from pyhealth.models import Transformer, RNN, RETAIN, MLP
from pyhealth.tasks import mortality_prediction_mimic3_fn, readmission_prediction_mimic3_fn, drug_recommendation_mimic3_fn, length_of_stay_prediction_mimic3_fn
from pyhealth.trainer import Trainer

dataset = MIMIC3Dataset(
    root='https://storage.googleapis.com/pyhealth/Synthetic_MIMIC-III/',
    tables=["DIAGNOSES_ICD", "PROCEDURES_ICD", "PRESCRIPTIONS"],
    code_mapping={
        # "ICD9CM": "CCSCM", 
        # "ICD9PROC": "CCSPROC",
        "NDC": ("ATC", {"target_kwargs": {"level": 3}})
        },
    dev=True,
    refresh_cache=True
)


INFO: Pandarallel will run on 16 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.
finish basic patient information parsing : 1.8765437602996826s
finish parsing DIAGNOSES_ICD : 1.5745110511779785s
finish parsing PROCEDURES_ICD : 1.207517147064209s
finish parsing PRESCRIPTIONS : 7.090853214263916s


Mapping codes: 100%|██████████| 1000/1000 [00:00<00:00, 2154.43it/s]


In [10]:
mimic3_ds = dataset.set_task(readmission_prediction_mimic3_fn)

train_dataset, val_dataset, test_dataset = split_by_patient(
    mimic3_ds, [0.8, 0.1, 0.1]
)
train_dataloader = get_dataloader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = get_dataloader(val_dataset, batch_size=32, shuffle=False)
test_dataloader = get_dataloader(test_dataset, batch_size=32, shuffle=False)

Generating samples for readmission_prediction_mimic3_fn: 100%|██████████| 1000/1000 [00:00<00:00, 404933.77it/s]


In [11]:
model_w_pre = Transformer(
    dataset=mimic3_ds,
    feature_keys=["conditions", "procedures", "drugs"],
    label_key="label",
    mode="binary",
    pretrained_emb="KG/transe",
    embedding_dim=256,
)

Loading pretrained embedding for conditions...
Loading pretrained embedding for procedures...
Loading pretrained embedding for drugs...


In [12]:
# STEP 4: define trainer
trainer = Trainer(model=model_w_pre)
trainer.train(
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloader,
    epochs=15,
    optimizer_params = {"lr": 1e-4},
    monitor="pr_auc",
)

# STEP 5: evaluate
print(trainer.evaluate(test_dataloader))

Transformer(
  (embeddings): ModuleDict(
    (conditions): Embedding(303, 512, padding_idx=0)
    (procedures): Embedding(101, 512, padding_idx=0)
    (drugs): Embedding(125, 512, padding_idx=0)
  )
  (linear_layers): ModuleDict(
    (conditions): Linear(in_features=512, out_features=256, bias=True)
    (procedures): Linear(in_features=512, out_features=256, bias=True)
    (drugs): Linear(in_features=512, out_features=256, bias=True)
  )
  (transformer): ModuleDict(
    (conditions): TransformerLayer(
      (transformer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadedAttention(
            (linear_layers): ModuleList(
              (0-2): 3 x Linear(in_features=256, out_features=256, bias=False)
            )
            (output_linear): Linear(in_features=256, out_features=256, bias=False)
            (attention): Attention()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (feed_forward): PositionwiseFeedForward(
            (w

Epoch 0 / 15: 100%|██████████| 1/1 [00:00<00:00, 101.48it/s]

--- Train epoch-0, step-1 ---
loss: 2.8671



Evaluation: 100%|██████████| 1/1 [00:00<00:00, 343.01it/s]


ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

: 

In [7]:
model_no_pre = Transformer(
    dataset=mimic3_ds,
    feature_keys=["conditions", "procedures", "drugs"],
    label_key="label",
    mode="binary",
    embedding_dim=256,
)

In [8]:
# STEP 4: define trainer
trainer = Trainer(model=model_no_pre)
trainer.train(
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloader,
    epochs=15,
    optimizer_params = {"lr": 1e-4},
    monitor="pr_auc",
)

# STEP 5: evaluate
print(trainer.evaluate(test_dataloader))

Transformer(
  (embeddings): ModuleDict(
    (conditions): Embedding(303, 256, padding_idx=0)
    (procedures): Embedding(101, 256, padding_idx=0)
    (drugs): Embedding(125, 256, padding_idx=0)
  )
  (linear_layers): ModuleDict()
  (transformer): ModuleDict(
    (conditions): TransformerLayer(
      (transformer): ModuleList(
        (0): TransformerBlock(
          (attention): MultiHeadedAttention(
            (linear_layers): ModuleList(
              (0-2): 3 x Linear(in_features=256, out_features=256, bias=False)
            )
            (output_linear): Linear(in_features=256, out_features=256, bias=False)
            (attention): Attention()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (feed_forward): PositionwiseFeedForward(
            (w_1): Linear(in_features=256, out_features=1024, bias=True)
            (w_2): Linear(in_features=1024, out_features=256, bias=True)
            (dropout): Dropout(p=0.5, inplace=False)
            (activation): 

Epoch 0 / 15: 100%|██████████| 1/1 [00:00<00:00, 83.17it/s]

--- Train epoch-0, step-1 ---
loss: 1.7021



Evaluation: 100%|██████████| 1/1 [00:00<00:00, 343.15it/s]


ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.