# Demo Notebook:
## DeepHit - Hypertension



In [1]:
import os
from pathlib import Path
import sys
node_type = os.getenv('BB_CPU')
venv_dir = f'/rds/homes/g/gaddcz/Projects/CPRD/virtual-envTorch2.0-{node_type}'
venv_site_pkgs = Path(venv_dir) / 'lib' / f'python{sys.version_info.major}.{sys.version_info.minor}' / 'site-packages'
if venv_site_pkgs.exists():
    sys.path.insert(0, str(venv_site_pkgs))
    print(f"Added path '{venv_site_pkgs}' at start of search paths.")
else:
    print(f"Path '{venv_site_pkgs}' not found. Check that it exists and/or that it exists for node-type '{node_type}'.")

%load_ext autoreload
%autoreload 2

Added path '/rds/homes/g/gaddcz/Projects/CPRD/virtual-envTorch2.0-icelake/lib/python3.10/site-packages' at start of search paths.


In [2]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import logging
from tqdm import tqdm
from hydra import compose, initialize
from omegaconf import OmegaConf

from CPRD.examples.modelling.benchmarks.make_method_loaders import get_dataloaders
from CPRD.examples.modelling.benchmarks.DeepHit.train_deephit import run_experiment


torch.manual_seed(1337)
logging.basicConfig(level=logging.INFO)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = "cpu"    # if more informative debugging statements are needed
print(f"Using device: {device}.")

Using device: cpu.


In [7]:
# Study params
seeds = [4,5]
sample_sizes = [None]  # 2999, 5296, 9351, 16509, 29148, 51461, 90856, 160407, 283203, 500000, None
bins=200

model_names = []
all_ctd = []
all_ibs = []
all_inbll = []
for sample_size in sample_sizes:

    seed_model_names = []
    seed_ctd = []
    seed_ibs = []
    seed_inbll = []
    for seed in seeds:
        for lr in [0.001]:
            print(f"\nSeed {seed}, sample size {sample_size}, learning_rate {lr}")
    
            # Seed
            torch.manual_seed(seed)
    
            # Load data
            dataset_train, dataset_val, dataset_test, meta_information = get_dataloaders("Hypertension", False, "deephit", sample_size=sample_size, seed=seed, bins=bins)
    
            # Train benchmark
            result_dict = run_experiment(dataset_train, dataset_val, dataset_test, meta_information, learning_rate=lr)
            print(result_dict)
        
            # Record
            seed_model_names.append(f"DeepHit-SR-Hypertension-Ns{sample_size}-seed{seed}-lr{lr}")
            seed_ctd.append(result_dict["ctd"])
            seed_ibs.append(result_dict["ibs"])
            seed_inbll.append(result_dict["inbll"])

    # Record
    model_names.append(seed_model_names)
    all_ctd.append(seed_ctd)
    all_ibs.append(seed_ibs)
    all_inbll.append(seed_inbll)
        


Seed 4, sample size None, learning_rate 0.001
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_Hypertension/benchmark_data/all.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_Hypertension/benchmark_data/all.pickle
0:	[8s / 8s],		train_loss: 0.3298,	val_loss: 0.3227
1:	[8s / 17s],		train_loss: 0.3207,	val_loss: 0.3223
2:	[8s / 26s],		train_loss: 0.3200,	val_loss: 0.3216
3:	[8s / 34s],		train_loss: 0.3196,	val_loss: 0.3216
4:	[8s / 43s],		train_loss: 0.3192,	val_loss: 0.3217
5:	[8s / 51s],		train_loss: 0.3190,	val_loss: 0.3217
6:	[8s / 1m:0s],		train_loss: 0.3189,	val_loss: 0.3216
7:	[8s / 1m:9s],		train_loss: 0.3187,	val_loss: 0.3218
8:	[8s / 1m:17s],		train_loss: 0.3185,	val_loss: 0.3218
9:	[8s / 1m:26s],		train_loss: 0.3183,	val_loss: 0.3221
10:	[8s / 1m:34s],		train_loss: 0.3183,	val_loss: 0.3216
11:	[8s / 1m:43s],		train_loss: 0.3

In [None]:
print(model_names)
print(all_ctd)
print(all_ibs)
print(all_inbll)

In [None]:
for batch in data_loader_test:
    x_train = batch[0]
    print(batch[0].shape)
    print(torch.mean(batch[1]))
    print(batch[2])
    break