In [1]:
#Cell 1
import random
import numpy as np
import torch
import os


seed = 1
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
os.environ["PYTHONHASHSEED"] = str(seed)

ModuleNotFoundError: No module named 'torch'

In [None]:
# Set the MEMORY_FS_ROOT environment variable to a directory with sufficient space
#Cell 2
os.environ['MEMORY_FS_ROOT'] = "/tmp/pandarallel_memory"

In [None]:
#Cell 3
from pyhealth.datasets import MIMIC3Dataset


example_dataset = MIMIC3Dataset(
    root="https://storage.googleapis.com/pyhealth/Synthetic_MIMIC-III_subset/",
    tables=["DIAGNOSES_ICD", "PROCEDURES_ICD", "PRESCRIPTIONS"],
    code_mapping={"ICD9CM": "CCSCM"},
)

In [None]:
# Print the statistics of the dataset
#Cell 4

example_dataset.stat()

In [None]:
# Access a specific patient by their patient id
#Cell 5
example_dataset.patients["10025"]

In [None]:
# Access a specific visit by its visit id
#Cell 6
example_dataset.patients["10025"].visits["110360"]

In [None]:
# Access all events of a certain type
#Cell 7
example_dataset.patients["10025"].visits["110360"].get_event_list("DIAGNOSES_ICD")

In [None]:
#Cell 8
from pyhealth.datasets import MIMIC3Dataset


"""
TODO 1: Process the Synthetic MIMIC-III Data [20 points]
"""
mimic3_dataset = None
# your code here
raise NotImplementedError

In [None]:
#Cell 9
from pyhealth.tasks import mortality_prediction_mimic3_fn


example_samples = example_dataset.set_task(mortality_prediction_mimic3_fn)

In [None]:
#Cell 10
# Print the statistics of the samples
example_samples.stat()

In [None]:
#Cell 11
# Access a specific sample by index
print(example_samples[0])

In [None]:
#Cell 12
# All information available for a specific sample
print(example_samples[0].keys())

In [None]:
#Cell 13
from pyhealth.tasks import readmission_prediction_mimic3_fn


"""
TODO 2: Set the Readmission Prediction Task [20 points]
"""
mimic3_samples = None
# your code here
raise NotImplementedError

In [None]:
#Cell 14
from pyhealth.datasets import split_by_sample, get_dataloader


# Data split
example_train_samples, example_val_samples, example_test_samples = split_by_sample(example_samples, [0.6, 0.2, 0.2])

# Create dataloaders
example_train_loader = get_dataloader(example_train_samples, batch_size=64, shuffle=True)
example_val_loader = get_dataloader(example_val_samples, batch_size=64, shuffle=False)
example_test_loader = get_dataloader(example_test_samples, batch_size=64, shuffle=False)

In [None]:
# Data split
#Cell 15
mimic3_train_samples, mimic3_val_samples, mimic3_test_samples = split_by_sample(mimic3_samples, [0.6, 0.2, 0.2])

# Create dataloaders
mimic3_train_loader = get_dataloader(mimic3_train_samples, batch_size=64, shuffle=True)
mimic3_val_loader = get_dataloader(mimic3_val_samples, batch_size=64, shuffle=False)
mimic3_test_loader = get_dataloader(mimic3_test_samples, batch_size=64, shuffle=False)

In [None]:
# Each sample in the dataset contains the following information
#Cell 16
print(example_samples[0].keys())

In [None]:
#Cell 17
from pyhealth.models import RNN


example_rnn = RNN(
    dataset=example_samples,
    feature_keys=["conditions", "procedures", "drugs"],
    label_key="label",
    mode="binary",
)

In [None]:
# Architecture of RNN
#Cell 18
example_rnn

In [None]:
"""
TODO 3: Build A RNN Model [20 Points]
"""
#Cell 19
mimic3_rnn = None
# your code here
raise NotImplementedError

# Print the architecture
print(mimic3_rnn)

In [None]:
#Cell 20
from pyhealth.trainer import Trainer


# Initialize the Trainer
example_trainer = Trainer(
    model=example_rnn,
    metrics=["pr_auc", "roc_auc"],
    device="cpu",
)

In [None]:
#Cell 21
# Train the model
example_trainer.train(
    train_dataloader=example_train_loader,  # Training data
    val_dataloader=example_val_loader,  # Validation data
    epochs=20,  # Number of training epochs
    optimizer_class=torch.optim.Adam,  # Optimizer choice
    optimizer_params={"lr": 0.001, "weight_decay": 1e-5},  # Optimizer parameters
    max_grad_norm=5.0,  # Gradient clipping
    monitor="roc_auc",  # Monitor AUC-ROC for best model selection
    monitor_criterion="max",  # Maximize AUC-ROC during training
    load_best_model_at_last=True,  # Automatically load the best-performing model
)

In [None]:
#Cell 22
"""
TODO 4: Train the RNN Model [20 Points]
"""
# Set up the Trainer
mimic3_trainer = Trainer(
    # your code here
    raise NotImplementedError
)

In [None]:
#Cell 23
"""
TODO 4: Train the RNN Model [20 Points]
"""
# Train the model
mimic3_trainer.train(
    # your code here
    raise NotImplementedError
)

In [None]:
#Cell 24
example_results = example_trainer.evaluate(example_test_loader)
print(example_results)

In [None]:
#Cell 25
"""
TODO 5: Evaluate the RNN Model [20 points]
"""

mimic3_results = None
# your code here
raise NotImplementedError

# Print evaluation metrics
print("MIMIC-III Test Set Evaluation Metrics:", mimic3_results)