In [1]:
import os
import numpy as np
from sklearn.metrics import mean_squared_error
from tqdm import tqdm
from model_synthetic_package import LSTMModel
from train_synthetic_package import trainInitIPTW

# dataset meta data
n_X_features = 27
n_X_static_features = 12
n_X_t_types = 1
n_classes = 1

In [2]:
import torch

In [3]:
import torch.nn.functional as F

import torch.optim as optim
from torch.utils import data
import torch.nn as nn

In [7]:
def get_dim():
    return n_X_features, n_X_static_features, n_X_t_types, n_classes


class SyntheticDataset(data.Dataset):
    def __init__(self, list_IDs, obs_w, treatment):
        '''Initialization'''
        self.list_IDs = list_IDs
        self.obs_w = obs_w
        self.treatment = treatment


    def __len__(self):
        '''Denotes the total number of samples'''
        return len(self.list_IDs)

    def __getitem__(self, index):
        '''Generates one sample of data'''
        # Select sample
        ID = self.list_IDs[index]

        # Load labels
        label = np.load(data_dir + '{}.y.npy'.format(ID))

        # Load data
        X_demographic = np.load(data_dir + '{}.static.npy'.format(ID))
        X_all = np.load(data_dir + '{}.x.npy'.format(ID))
        X_treatment_res = np.load(data_dir + '{}.a.npy'.format(ID))

        X = torch.from_numpy(X_all.astype(np.float32))
        X_demo = torch.from_numpy(X_demographic.astype(np.float32))
        X_treatment = torch.from_numpy(X_treatment_res.astype(np.float32))
        y = torch.from_numpy(label.astype(np.float32))

        return X, X_demo, X_treatment, y

In [5]:
#Default Parameters
treatment_option = 'vaso'
observation_window = 12
epochs = 1
batch_size = 128
lr = .001
weight_decay = .00001
l1 = .00001
resume = ''.format(treatment_option)
cuda_device = 1

gamma_h=(.1,.3,.5,.7)
HIDDEN_SIZE = 32
CUDA = False

print('hi')
os.makedirs(r'model_checkpoints', exist_ok=True)

hi


In [16]:
train_test_split[np.where(train_test_split == 1)[0]][:, 0]

array([200003, 200019, 200030, ..., 299992, 299995, 299998])

In [17]:
for gamma in gamma_h:
    data_dir = '../data/data_synthetic/data_mimic_mean_syn_{}/'.format(gamma)
    save_model = 'model_checkpoints/mimic-syn-6-7-{}.pt'.format(gamma)
    train_test_split = np.loadtxt('../data/data_synthetic/data_mimic_mean_syn_{}/train_test_split.csv'.format(gamma), delimiter=',',
                                  dtype=int)
    train_iids = train_test_split[np.where(train_test_split == 1)[0]][:, 0]
    val_iids = train_test_split[np.where(train_test_split == 2)[0]][:, 0]
    test_iids = train_test_split[np.where(train_test_split == 0)[0]][:, 0]
    train_dataset = SyntheticDataset(train_iids, 12, treatment_option)
    val_dataset = SyntheticDataset(val_iids, 12, treatment_option)
    test_dataset = SyntheticDataset(test_iids, 12, treatment_option)
    train_loader = torch.utils.data.DataLoader(train_dataset, 128, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, 128, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, 128, shuffle=True)
    n_X_features, n_X_static_features, n_X_fr_types, n_classes = get_dim()
    if ''.format(treatment_option):
        if os.path.isfile(''.format(treatment_option)):
            print("=> loading checkpoint '{}'".format(''.format(treatment_option)))

            model = torch.load(''.format(treatment_option))
            model = model.cuda()

            print("=> loaded checkpoint '{}'"
                  .format(''.format(treatment_option)))

        else:
            print("=> no checkpoint found at '{}'".format(''.format(treatment_option)))
    else:

        attn_model = 'concat2'
        n_Z_confounders = HIDDEN_SIZE

        model = LSTMModel(n_X_features, n_X_static_features, n_X_fr_types, n_Z_confounders,
                          attn_model, n_classes, 12,
                          128, hidden_size=HIDDEN_SIZE)

    adam_optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)

    model = trainInitIPTW(train_loader, val_loader, test_loader,
                          model, epochs=epochs,
                          criterion=F.binary_cross_entropy_with_logits, optimizer=adam_optimizer,
                          l1_reg_coef=1e-5,
                          use_cuda=False,
                          save_model=save_model)

100%|█████████████████████████████████████████████████████████████████| 50/50 [00:16<00:00,  2.98it/s]

Epoch: 0, IPW train loss: 0.6220552289485931
Epoch: 0, Outcome train loss: 0.11779082551598549





Validation:
PEHE: 0.3003	ATE: 0.2446
RMSE: 0.3183

Best model. Saving...

Test:
PEHE: 0.3006	ATE: 0.2428
RMSE: 0.2992

0.30062394192938574
0.24277463818366118
0.29923904


100%|█████████████████████████████████████████████████████████████████| 50/50 [00:18<00:00,  2.65it/s]

Epoch: 0, IPW train loss: 0.5880910068750381
Epoch: 0, Outcome train loss: 0.08143239244818687





Validation:
PEHE: 0.4806	ATE: 0.4188
RMSE: 0.2092

Best model. Saving...

Test:
PEHE: 0.4845	ATE: 0.4235
RMSE: 0.2037

0.4844838121395484
0.4235131926597934
0.20366123


100%|█████████████████████████████████████████████████████████████████| 50/50 [00:22<00:00,  2.25it/s]

Epoch: 0, IPW train loss: 0.6386651742458344
Epoch: 0, Outcome train loss: 0.1108573567122221





Validation:
PEHE: 0.5450	ATE: 0.4703
RMSE: 0.2752

Best model. Saving...

Test:
PEHE: 0.5445	ATE: 0.4728
RMSE: 0.2502

0.5445424873676763
0.472797603707241
0.25020012


100%|█████████████████████████████████████████████████████████████████| 50/50 [00:21<00:00,  2.28it/s]

Epoch: 0, IPW train loss: 0.6490657961368561
Epoch: 0, Outcome train loss: 0.11887206986546517





Validation:
PEHE: 0.2554	ATE: 0.2044
RMSE: 0.1929

Best model. Saving...

Test:
PEHE: 0.2541	ATE: 0.1998
RMSE: 0.1887

0.25412330157722635
0.1997513266103685
0.18868367
