# main_baseline_new

## The first part:
* **model**: 21 separate sklearn.linear_model.LogisticRegression models, each for one target
* **training data**: 
    * predictors = np.concatenate((**static_feature, temporal_feature, temporal_mask**), 1), shape: [adm_num, 29]
        * **static_feature** : shape: [adm_num, static_feature_dim(5)]
        * **temporal_feature** : shape: [adm_num, temporal_feature_dim(12)], 
      variable "temporal_feature[i,j]" denotes admission i's first non-missing measurement for feature j, if admission i doesn't have non-missing measurement, then temporal_feature[i,j] == 0
        * **temporal_mask** : shape: [adm_num, temporal_feature_dim(12)], temporal_mask[i,j] == 1 if admission i has non-missing measurement for feature j, otherwise, temporal_mask[i,j] == 0
* **labels**:
    * targets = np.concatenate((y_icd9, y_mor), 1), shape: [adm_num, 21]
        * y_icd9: shape: [adm_num, 20]
        * y_mor: shape: [adm_num, 1]

In [71]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import average_precision_score, roc_auc_score
import pandas as pd
pd.set_option('display.max_columns', None)

In [29]:
org_data = np.load("./MIMIC_timeseries/24hours/series/imputed-normed-ep_1_24.npz", allow_pickle=True)
temporal_feature_dim = 12
static_feature_dim = 5
targets_dim = 21
adm_num = len(org_data['ep_tdata'])
temporal_feature = np.zeros((adm_num, temporal_feature_dim))
temporal_mask = np.zeros((adm_num, temporal_feature_dim))
static_feature = np.zeros((adm_num, static_feature_dim))
targets = np.zeros((adm_num, targets_dim))


In [30]:
for i in range(adm_num):
    for j in range(temporal_feature_dim):
        temp_f = org_data["ep_tdata"][i][:, j]
        temp_m = org_data["ep_tdata_masking"][i][:, j]
        if any(temp_m):
            temporal_feature[i, j] = temp_f[temp_m == True][0]
            temporal_mask[i, j] = 1
        else:
            temporal_feature[i, j] = 0
            temporal_mask[i, j] = 0
        

In [40]:
static_feature = org_data["adm_features_all"]
y_icd9 = org_data["y_icd9"]
y_mor = org_data["y_mor"]
targets = np.concatenate((y_icd9, y_mor), 1)
predictors = np.concatenate((static_feature, temporal_feature, temporal_mask), 1)
X_train, X_test, y_train, y_test = train_test_split(predictors, targets, test_size=0.33, random_state=42)

In [45]:
clf_models_list = []
for i in range(targets_dim):
    clf_models_list.append(LogisticRegression(max_iter=1000).fit(X_train, y_train[:, i]))



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [81]:
auprc_lists = np.zeros(targets_dim)
auroc = np.zeros(targets_dim)

for i in range(targets_dim):
    cur_y_true = y_test[:, i]
    cur_y_score = clf_models_list[i].predict_proba(X_test)[:,1] 
    # "y_score" used to compute auprc/auroc is the probability estimates of the positive class
    # through "clf_models_list[i].classes_" we know the order of target class is [0 1], so using [:,1] 
    # will return the probability estimates of the positive class 
    auprc_lists[i] = average_precision_score(cur_y_true, cur_y_score)
    auroc[i] = roc_auc_score(cur_y_true, cur_y_score)

In [73]:
auprc_lists

array([0.44669215, 0.23952274, 0.77001515, 0.47824311, 0.37871609,
       0.32696577, 0.94060067, 0.6018682 , 0.59582595, 0.70220011,
       0.05419829, 0.14603482, 0.23969321, 0.08384789, 0.41713585,
       0.10544544, 0.04146463, 0.50384245, 0.5575825 , 0.36984488,
       0.23998469])

### baseline for auprc of each target is computed as the positive rate in each target's class

In [74]:
baseline_for_auprc = np.zeros(21)
for i in range(targets_dim):
    baseline_for_auprc[i] = np.count_nonzero(y_test[:,i]) / len(y_test[:, i])
baseline_for_auprc

array([0.25238176, 0.17106159, 0.67786662, 0.36415447, 0.31753998,
       0.28317455, 0.82332426, 0.48205172, 0.38618578, 0.38618578,
       0.00425315, 0.10079959, 0.18747873, 0.0358115 , 0.31413746,
       0.08225587, 0.02943178, 0.44683566, 0.46444369, 0.33370194,
       0.10037428])

In [75]:
col_names = []
for i in range(1, 22):
    col_names.append("t"+str(i))
auprc_compares = np.zeros((2, targets_dim))
auprc_compares[0] = baseline_for_auprc
auprc_compares[1] = auprc_lists
auprc = pd.DataFrame(auprc_compares, columns=col_names, index = ['base', "logistic_model"])
auprc = auprc.round(3)
auprc

Unnamed: 0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18,t19,t20,t21
base,0.252,0.171,0.678,0.364,0.318,0.283,0.823,0.482,0.386,0.386,0.004,0.101,0.187,0.036,0.314,0.082,0.029,0.447,0.464,0.334,0.1
logistic_model,0.447,0.24,0.77,0.478,0.379,0.327,0.941,0.602,0.596,0.702,0.054,0.146,0.24,0.084,0.417,0.105,0.041,0.504,0.558,0.37,0.24


In [82]:
auroc_df = pd.DataFrame(auroc.reshape(1, targets_dim), columns=col_names)
auroc_df = auroc_df.round(3)
auroc_df

Unnamed: 0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18,t19,t20,t21
0,0.694,0.609,0.633,0.635,0.576,0.561,0.806,0.63,0.663,0.76,0.94,0.613,0.586,0.686,0.61,0.57,0.619,0.561,0.613,0.548,0.702


## The second part
* **model**: pytorch multi-label binary classification problem 
    * n_classes: 21 (y_icd9: 20, y_mor: 1)
    * n_labels for each class: 2 (0/1)
    * model structure: 2-layer MLP, the first layer's size is the same as the input dim; the output layer's size is the same as targets dim(21) without sigmoid activation function here, because we use BCEWithLogitsLoss as loss function, which combines a Sigmoid layer and the BCELoss in one single class
* **training data**: 
    * input_tensor = torch.cat((static_feature_tensor, temporal_feature_tensor, temporal_mask_tensor), 1)
    all of the three above variables are the same as those in the first model but the dtype is tensor.
    
* **labels**:
    * targets_tensor = torch.tensor(targets)
    same as "targets" in the first model but in the tensor form


In [397]:
import torch.nn as nn
import torch
from torch.utils.data import Dataset, random_split, DataLoader
from torch.nn import BCEWithLogitsLoss
from torch import optim

### variables used below:
* **temporal_feature**: numpy array with shape: (adm_num, temporal_feature_dim(12)) 
* **temporal_mask**: numpy array with shape: (adm_num, temporal_feature_dim(12))
* **static_feature**: numpy array with shape: (adm_num, static_feature_dim(5))

all of the three above variables are the same with those in the first model

In [364]:

temporal_feature = temporal_feature.astype(np.float32)
static_feature = static_feature.astype(np.float32)
temporal_mask = temporal_mask.astype(np.float32)
targets = targets.astype(np.float32)

temporal_feature_tensor = torch.tensor(temporal_feature) # shape: [35623, 12]
temporal_mask_tensor = torch.tensor(temporal_mask) # shape: [35623, 12]
static_feature_tensor = torch.tensor(static_feature) # shape: [35623, 5]
targets_tensor = torch.tensor(targets) # shape: [35623, 21]
input_tensor = torch.cat((static_feature_tensor, temporal_feature_tensor, temporal_mask_tensor), 1) # [35623, 29]


In [365]:
class DatasetForBaselineModel(Dataset):
    def __init__(self, input_tensors, target_tensors):
        self.inputs = input_tensors
        self.targets = target_tensors
    def __len__(self):
        return(len(self.inputs))
    def __getitem__(self, idx):
        cur_input = self.inputs[idx]
        cur_target = self.targets[idx]
        return cur_input, cur_target

In [415]:
class LogisticRegression(nn.Module):
    def __init__(self, temporal_dim, mask_dim, static_dim, targets_dim):
        super(LogisticRegression, self).__init__()
        self.input_dim = temporal_dim + mask_dim + static_dim
        self.targets_dim = targets_dim
        self.logistic_layers = nn.Linear(self.input_dim, self.targets_dim)
        
    def forward(self, input_data, label, pos_weight):
        # input_data: [29]
        # label: [21]
        predict_list = self.logistic_layers(input_data)
        criterion = BCEWithLogitsLoss(pos_weight=pos_weight)
        loss = criterion(predict_list, label)
        return {"predicts": predict_list, "loss": loss}  

In [367]:
dataset_baseline = DatasetForBaselineModel(input_tensor, targets_tensor)
train_size = int(0.7 * len(dataset_baseline))
test_size = len(dataset_baseline) - train_size
train_set, test_set = random_split(dataset_baseline, [train_size, test_size], 
                                   generator=torch.Generator().manual_seed(42))

In [368]:
batch_size = 10
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, drop_last=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True, drop_last=True)

#### targets_pos_weight
Below variable "targets_pos_weight" is the parameter "pos_weight" of torch.nn.BCEWithLogitsLoss loss function

In [412]:
import pandas as pd
targets_df = pd.DataFrame(data=targets, columns=col_names)
targets_label_distribution = np.zeros((2, targets_dim))
for t_id in range(targets_dim):
    targets_label_distribution[0][t_id] = targets_df["t" + str(t_id+1)].value_counts()[0]
    targets_label_distribution[1][t_id] = targets_df["t" + str(t_id+1)].value_counts()[1]


targets_label_distribution_df = pd.DataFrame(data=targets_label_distribution, columns=col_names)
targets_pos_weight = torch.zeros(targets_dim)
for t_id in range(targets_dim):
    targets_pos_weight[t_id] = targets_label_distribution_df["t" + str(t_id+1)][0] / targets_label_distribution_df["t" + str(t_id+1)][1]
print(targets_pos_weight) 

tensor([2.9454e+00, 4.8227e+00, 4.6212e-01, 1.7316e+00, 2.1592e+00, 2.4612e+00,
        2.0596e-01, 1.0781e+00, 1.5713e+00, 1.5389e+00, 2.3649e+02, 8.8679e+00,
        4.3129e+00, 2.6701e+01, 2.1436e+00, 1.0835e+01, 3.2862e+01, 1.2249e+00,
        1.1398e+00, 2.0016e+00, 9.1432e+00])


In [416]:
# training model
temporal_dim = 12
mask_dim = 12
static_dim = 5
targets_dim = 21
model = LogisticRegression(temporal_dim, mask_dim, static_dim, targets_dim)
optimizer_second = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.05, amsgrad=True)
model.train()
for epoch in range(10):
    print("epoch: \n", epoch)
    cum_loss_20000 = 0
    for t, train_batch in enumerate(train_loader):
        input_data = train_batch[0]
        target_data = train_batch[1]  # shape: [10, 21]
#         predict_list = torch.zeros((batch_size, targets_dim), requires_grad=True)
#         for b_id in range(batch_size):
#             predict_list[b_id] = model(input_data[b_id])
        result = model(input_data, target_data, targets_pos_weight)
#         print("target:\n")
#         print(target_data)
#         print("predict:\n")
#         print(predict_list)
#         MTL_loss = criterion(predict_list, target_data)
        optimizer_second.zero_grad()
        result["loss"].backward()
        optimizer_second.step()
        cum_loss_20000 += result["loss"].item()
        if (t+1) % 2000 == 0:
            avg_loss = cum_loss_20000 / 20000
            print("current time: ", (t+1)*batch_size, " current averagetraining loss: ",  avg_loss)
            cum_loss_20000 = 0

        
        
        

epoch: 
 0
current time:  20000  current averagetraining loss:  6.495218864971399
epoch: 
 1
current time:  20000  current averagetraining loss:  0.5843146128237248
epoch: 
 2
current time:  20000  current averagetraining loss:  0.5382390275299549
epoch: 
 3
current time:  20000  current averagetraining loss:  0.5213832790255547
epoch: 
 4
current time:  20000  current averagetraining loss:  0.5229665592849254
epoch: 
 5
current time:  20000  current averagetraining loss:  0.5466823604941368
epoch: 
 6
current time:  20000  current averagetraining loss:  0.5432641554951668
epoch: 
 7
current time:  20000  current averagetraining loss:  0.5541633482813835
epoch: 
 8
current time:  20000  current averagetraining loss:  0.5372722413420677
epoch: 
 9
current time:  20000  current averagetraining loss:  0.5221352175951004


In [418]:
model.eval()
y_true = torch.zeros((test_size, targets_dim))
y_score = torch.zeros((test_size, targets_dim))
with torch.no_grad():
    for t, test_batch in enumerate(test_loader):
        cur_inputs = test_batch[0] # shape:[10, 29]
        cur_targets = test_batch[1] # shape: [10, 21]
        y_true[t*batch_size:(t+1)*batch_size, :] = cur_targets
#         cur_predict = torch.zeros((batch_size, targets_dim))
#         for i in range(batch_size):
#             cur_predict[i] = model(cur_inputs[i])
        result = model(cur_inputs, cur_targets, targets_pos_weight)
        cur_predict = result["predicts"]
        y_score[t*batch_size:(t+1)*batch_size, :] = cur_predict

auprc_list_pytorch_base = np.zeros(targets_dim)
auroc_list_pytorch_base = np.zeros(targets_dim)
for t_id in range(targets_dim):
    cur_y_true = y_true[:, t_id].detach().numpy()
    cur_y_score = y_score[:, t_id].detach().numpy()
    auprc_list_pytorch_base[t_id] = average_precision_score(cur_y_true, cur_y_score)
    auroc_list_pytorch_base[t_id] = roc_auc_score(cur_y_true, cur_y_score)

In [419]:
auprc_compares_pytorch = np.zeros((2, targets_dim))
auprc_compares_pytorch[0] = baseline_for_auprc
auprc_compares_pytorch[1] = auprc_list_pytorch_base
auprc_pytorch = pd.DataFrame(auprc_compares_pytorch, columns=col_names, index = ['base', "linear_model_pytorch"])
auprc_pytorch = auprc_pytorch.round(3)
auprc_pytorch

Unnamed: 0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18,t19,t20,t21
base,0.252,0.171,0.678,0.364,0.318,0.283,0.823,0.482,0.386,0.386,0.004,0.101,0.187,0.036,0.314,0.082,0.029,0.447,0.464,0.334,0.1
linear_model_pytorch,0.408,0.216,0.761,0.432,0.371,0.312,0.938,0.589,0.44,0.696,0.028,0.137,0.234,0.068,0.357,0.107,0.043,0.501,0.543,0.353,0.215


In [420]:
auroc_df_pytorch = pd.DataFrame(auroc_list_pytorch_base.reshape(1, targets_dim), columns=col_names)
auroc_df_pytorch = auroc_df_pytorch.round(3)
auroc_df_pytorch

Unnamed: 0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18,t19,t20,t21
0,0.645,0.597,0.647,0.566,0.574,0.536,0.795,0.631,0.544,0.735,0.932,0.609,0.576,0.674,0.545,0.568,0.586,0.533,0.606,0.518,0.718


## The third part
pytorch baseline model with only static features
* **model**: the model structure is the same as the second model, but the input size for the first layer changed to static feature dim(5)
* **input**: only the static variable
* **targets**: still the 21 targets


In [204]:
class Dataset_LR_only_static(Dataset):
    def __init__(self, static_data, targets):
        self.static_data = static_data
        self.targets = targets
    def __len__(self):
        return len(self.static_data)
    def __getitem__(self, idx):
        cur_static_feature = self.static_data[idx]
        cur_targets = self.targets[idx]
        return cur_static_feature, cur_targets
        
static_feature_tensor = torch.tensor(static_feature) # shape: [35623, 5]
targets_tensor = torch.tensor(targets) # shape: [35623, 21]
dataset_third = Dataset_LR_only_static(static_feature_tensor, targets_tensor)
train_set_third, test_set_third = random_split(dataset_third, [train_size, test_size], 
                                   generator=torch.Generator().manual_seed(42))
train_loader_third = DataLoader(train_set_third, batch_size=batch_size, shuffle=True, drop_last=True)
test_loader_third = DataLoader(test_set_third, batch_size=batch_size, shuffle=True, drop_last=True)

In [422]:
class LR_only_static(nn.Module):
    def __init__(self, static_dim, targets_dim):
        super(LR_only_static, self).__init__()
        self.targets_dim = targets_dim
        self.static_dim = static_dim
        self.LR_layer = nn.Linear(static_dim, self.targets_dim)
                                   
    def forward(self, input_data, targets, pos_weight):
        predict_list = self.LR_layer(input_data)
        criterion = BCEWithLogitsLoss(pos_weight=pos_weight)
        loss = criterion(predict_list, targets)
        return {"predicts": predict_list, "loss": loss}  
    

In [423]:
static_dim = 5
targets_dim = 21
model_third = LR_only_static(static_dim, targets_dim)
optimizer_third = optim.Adam(model_third.parameters(), lr=0.001, weight_decay=0.05, amsgrad=True)
model_third.train()
for epoch in range(10):
    print("epoch: \n", epoch+1)
    cum_loss = 0
    for t, sample in enumerate(train_loader_third):
        input_batch = sample[0] # shape: [10, 5]
        target_batch = sample[1] # shape: [10, 21]
        optimizer_third.zero_grad()
        result = model_third(input_batch, target_batch, targets_pos_weight)
        loss = result["loss"]
        cum_loss += loss.item()
        loss.backward()
        optimizer_third.step()
        if (t+1) % 2000 == 0:
            print("current time: ", ((t+1)*batch_size), "current avg loss: ", cum_loss / 20000)
            cum_loss = 0
            
    
    

epoch: 
 1
current time:  20000 current avg loss:  48.75170938867628
epoch: 
 2
current time:  20000 current avg loss:  0.5878763862788677
epoch: 
 3
current time:  20000 current avg loss:  0.5931688434004784
epoch: 
 4
current time:  20000 current avg loss:  0.5738430133223533
epoch: 
 5
current time:  20000 current avg loss:  0.5957935884594917
epoch: 
 6
current time:  20000 current avg loss:  0.6178610707044602
epoch: 
 7
current time:  20000 current avg loss:  0.5869402446866036
epoch: 
 8
current time:  20000 current avg loss:  0.5875429338991642
epoch: 
 9
current time:  20000 current avg loss:  0.5447198246538639
epoch: 
 10
current time:  20000 current avg loss:  0.5845242021918297


In [425]:
model_third.eval()
y_true = torch.zeros((test_size, targets_dim))
y_score = torch.zeros((test_size, targets_dim))
for t, test_batch in enumerate(test_loader_third):
    cur_inputs = test_batch[0] # shape:[10, 5]
    cur_targets = test_batch[1] # shape: [10, 21]
    y_true[t*batch_size:(t+1)*batch_size, :] = cur_targets
    cur_predict = model_third(cur_inputs, cur_targets, targets_pos_weight)["predicts"]
    y_score[t*batch_size:(t+1)*batch_size, :] = cur_predict
    
auprc_pytorch_only_static = np.zeros(targets_dim)
auroc_pytorch_only_static = np.zeros(targets_dim)
for t_id in range(targets_dim):
    cur_y_true = y_true[:, t_id].detach().numpy()
    cur_y_score = y_score[:, t_id].detach().numpy()
    auprc_pytorch_only_static[t_id] = average_precision_score(cur_y_true, cur_y_score)
    auroc_pytorch_only_static[t_id] = roc_auc_score(cur_y_true, cur_y_score)

# auprc_compares_pytorch = np.zeros((2, targets_dim))
# auprc_compares_pytorch[0] = baseline_for_auprc
# auprc_compares_pytorch[1] = auprc_list_pytorch_base
# auprc_pytorch = pd.DataFrame(auprc_compares_pytorch, columns=col_names, index = ['base', "linear_model_pytorch"])
# auprc_pytorch = auprc_pytorch.round(3)
# auprc_pytorch

In [426]:
auprc_pytorch_only_static

array([0.24844829, 0.16042645, 0.58100458, 0.3391011 , 0.29089721,
       0.28303419, 0.93640251, 0.43833232, 0.35229365, 0.50841817,
       0.02809732, 0.09159839, 0.22770114, 0.05324051, 0.30570282,
       0.08793018, 0.02563605, 0.49508373, 0.53684851, 0.41015563,
       0.07498308])

In [427]:
auroc_pytorch_only_static

array([0.4969658 , 0.50697722, 0.37915694, 0.47107629, 0.43142515,
       0.49602762, 0.7922037 , 0.44747195, 0.47420826, 0.63388616,
       0.93318912, 0.47184383, 0.57413807, 0.65400816, 0.48943537,
       0.52162025, 0.43268394, 0.51960798, 0.60636517, 0.54464194,
       0.36763984])

## The fourth part
* **model**: includes a LSTMCell structrue to get embeddings for temporal features, an one-layer MLP to get the prediction for 21 classes
* **input**:
    * **LSTMCell** : original temporal features on each time stamp's with shape: [batch_size, temporal_feature_dim(12)]
    * **one layer MLP**: final hidden state from LSTMCell model concatenate with static features with shape: [batch_size, final_hidden_embedding_dim+static_dim]
* **targets**: still the 21 targets


In [430]:
from torch.nn import LSTMCell

In [437]:
# variables(tensors) than can be used directly
# static_feature_tensor    # shape: [35623, 5]
# targets_tensor     # shape: [35623, 21]
# org_data = np.load("./MIMIC_timeseries/24hours/series/imputed-normed-ep_1_24.npz", allow_pickle=True)

class Dataset_fourth(Dataset):
    def __init__(self, org_data, static_feature, targets):
        self.org_data = org_data
        self.static_feature = static_feature # [35623, 5]
        self.targets = targets # [35623, 21]
        self.temporal_feature = torch.tensor(self.org_data["ep_tdata"].astype(np.float32)) # [35623, 24, 12]
        self.temporal_mask = torch.tensor(self.org_data["ep_tdata_masking"].astype(np.float32)) # [35623, 24, 12]
        
    def __len__(self):
        return len(self.targets)
    def __getitem__(self, idx):
        cur_temporal_feature = self.temporal_feature[idx] # [24, 12]
        cur_static_feature = self.static_feature[idx] # [5]
        cur_target = self.targets[idx] # [21]
        cur_temporal_mask = self.temporal_mask[idx] # [24, 12]
        return cur_temporal_feature, cur_static_feature, cur_target, cur_temporal_mask
    
dataset_fourth = Dataset_fourth(org_data, static_feature_tensor, targets_tensor)     
train_set_fourth, test_set_fourth = random_split(dataset_fourth, [train_size, test_size], 
                                   generator=torch.Generator().manual_seed(42))

train_loader = DataLoader(train_set_fourth, batch_size=batch_size, shuffle=True, drop_last=True)
test_loader = DataLoader(test_set_fourth, batch_size=batch_size, shuffle=True, drop_last=True)

#### MultiModal Class:
is used to concatenate final hidden state of the temporal features and the static variables and predict the targets

In [454]:
class MultiModal(nn.Module):
    def __init__(self, hidden_dim_lstm, static_feature_dim, targets_dim):
        super(MultiModal, self).__init__()
        self.hidden_dim_lstm = hidden_dim_lstm
        self.static_feature_dim = static_feature_dim
        self.targets_dim = targets_dim
        self.LR_layer = nn.Linear(self.hidden_dim_lstm+self.static_feature_dim, self.targets_dim)
        
    def forward(self, final_hidden_embeddings, statics, targets, pos_weight):
#         print("current final_hidden_embeddings:\n")
#         print(final_hidden_embeddings.shape) # shape: [batch_size, final_hidden_embeddings_dim]
#         print("current statics:\n")
#         print(statics.shape)  # [batch_size, static_size]
        input_data = torch.cat((final_hidden_embeddings, statics), 1) # shape: [batch_size, final_hidden_emb_dim+static_dim]
        predict_list = self.LR_layer(input_data) 
        criterion = BCEWithLogitsLoss(pos_weight=pos_weight)
        loss = criterion(predict_list, targets)
        return {"predicts": predict_list, "loss": loss}

#### BaseModel_Fourth Class :
is used to get final hidden states of the temporal features with batch_size


In [455]:
class BaseModel_Fourth(nn.Module):
    def __init__(self, temporal_feature_dim, hidden_dim_lstm, static_feature_dim, batch_size, seq_len, targets_dim):
        super(BaseModel_Fourth, self).__init__()
        self.batch_size = batch_size
        self.hidden_dim_lstm = hidden_dim_lstm
        self.temporal_feature_dim = temporal_feature_dim
        self.static_feature_dim = static_feature_dim
        self.seq_len = seq_len
        self.targets_dim = targets_dim
        self.initial_hidden_cell =  (torch.randn(self.batch_size, self.hidden_dim_lstm), 
                                     torch.randn(self.batch_size, self.hidden_dim_lstm))
        self.lstm = LSTMCell(self.temporal_feature_dim*2, self.hidden_dim_lstm)
        self.mlp = MultiModal(self.hidden_dim_lstm, self.static_feature_dim, self.targets_dim)
    def forward(self, temporal_features, temporal_masks, static_features, labels, pos_weight):
        input_lstm = torch.cat((temporal_features, temporal_masks), 2) # shape: [batch_size, 24, 12+12]
        h, c = self.initial_hidden_cell
        for t in range(self.seq_len):
            cur_input_lstm = input_lstm[:, t, :] # shape: [batch_size, 12+12]
            h, c = self.lstm(cur_input_lstm, (h, c))
        result = self.mlp(h, static_features, labels, pos_weight)
        return result
        

In [457]:
temporal_feature_dim = 12
hidden_dim_lstm = 30
static_feature_dim = 5
targets_dim = 21
seq_len = 24
#  temporal_feature_dim, hidden_dim_lstm, batch_size, seq_len
model_fourth = BaseModel_Fourth(temporal_feature_dim, hidden_dim_lstm, static_feature_dim, batch_size, seq_len, targets_dim)
optimizer_fourth = optim.Adam(model_fourth.parameters(), lr=0.001, weight_decay=0.05, amsgrad=True)
model_fourth.train()
for epoch in range(10):
    print("epoch: \n", epoch+1)
    cum_loss = 0
    for t, item_train in enumerate(train_loader):
        cur_targets_batch = item_train[2] # shape: [batch_size, 21]
        optimizer_fourth.zero_grad()
        item_train[0][item_train[0] != item_train[0]] = 0
        cur_temporal_features_batch = item_train[0]
        cur_temporal_masks_batch = item_train[3]
        cur_static_features_batch = item_train[1]
#         temporal_features, temporal_masks, static_features, labels, pos_weight)
        cur_result = model_fourth(cur_temporal_features_batch, cur_temporal_masks_batch, cur_static_features_batch, cur_targets_batch, targets_pos_weight) # shape: [batch_size, 21]
        loss = cur_result["loss"]
        cum_loss += loss.item()
        loss.backward()
        optimizer_fourth.step()
        if (t + 1) % 2000 == 0:
            print("current time: ", ((t+1)*batch_size), "current avg loss: ", cum_loss / 20000)
            cum_loss = 0
         

epoch: 
 1
current time:  20000 current avg loss:  7.9764210554718975
epoch: 
 2
current time:  20000 current avg loss:  0.5924440301656723
epoch: 
 3
current time:  20000 current avg loss:  0.6092794606506825
epoch: 
 4
current time:  20000 current avg loss:  0.572734712588787
epoch: 
 5
current time:  20000 current avg loss:  0.5659458463907242
epoch: 
 6
current time:  20000 current avg loss:  0.534124119013548
epoch: 
 7
current time:  20000 current avg loss:  0.548151489174366
epoch: 
 8
current time:  20000 current avg loss:  0.5710092915773392
epoch: 
 9
current time:  20000 current avg loss:  0.5568293450355529
epoch: 
 10
current time:  20000 current avg loss:  0.5810946937799454


In [459]:
model_fourth.eval()
y_true = torch.zeros((test_size, targets_dim))
y_score = torch.zeros((test_size, targets_dim))
with torch.no_grad():
    for t, test_batch in enumerate(test_loader):
        test_batch[0][test_batch[0] != test_batch[0]] = 0
        cur_targets_batch = test_batch[2]
        y_true[t*batch_size: (t+1)*batch_size, :] = cur_targets_batch
        cur_temporal_features = test_batch[0]
        cur_temporal_masks = test_batch[3]
        cur_static_features = test_batch[1]
        cur_predicts_batch = model_fourth(cur_temporal_features, cur_temporal_masks, cur_static_features, cur_targets_batch, targets_pos_weight)["predicts"] # 
#         print("predicts size:\n")
#         print(cur_predicts_batch.shape) # [batch_size, targets_dim]
        y_score[t*batch_size: (t+1)*batch_size, :] = cur_predicts_batch
    
auprc_pytorch_temporal_cat_static = np.zeros(targets_dim)
auroc_pytorch_temporal_cat_static = np.zeros(targets_dim)

for t_id in range(targets_dim):
    cur_y_true = y_true[:, t_id].detach().numpy()
    cur_y_score = y_score[:, t_id].detach().numpy()
    auprc_pytorch_temporal_cat_static[t_id] = average_precision_score(cur_y_true, cur_y_score)
    auroc_pytorch_temporal_cat_static[t_id] = roc_auc_score(cur_y_true, cur_y_score)
     

In [460]:
auprc_pytorch_temporal_cat_static

array([0.24923769, 0.33617761, 0.74845494, 0.40911434, 0.37244693,
       0.2961094 , 0.69645145, 0.52730911, 0.4127813 , 0.53441283,
       0.02706345, 0.11070036, 0.15767061, 0.0334669 , 0.3282593 ,
       0.08124161, 0.04179913, 0.49579534, 0.53556414, 0.41061113,
       0.22106324])

In [461]:
auroc_pytorch_temporal_cat_static

array([0.49894812, 0.69496494, 0.63193605, 0.56214962, 0.57552985,
       0.50377338, 0.2120726 , 0.5547802 , 0.53879243, 0.65011933,
       0.91204847, 0.52949254, 0.43294003, 0.49843517, 0.51214397,
       0.48040042, 0.57202258, 0.5193232 , 0.60636035, 0.54419968,
       0.69767675])

In [464]:
final_auprc_compare = np.zeros((5, targets_dim))
final_auprc_compare[0] = baseline_for_auprc
final_auprc_compare[1] = auprc_lists 
final_auprc_compare[2] = auprc_list_pytorch_base
final_auprc_compare[3] = auprc_pytorch_only_static
final_auprc_compare[4] = auprc_pytorch_temporal_cat_static
auprc_sum = pd.DataFrame(final_auprc_compare, columns=col_names, 
                         index = ['baseline', 'sklearn_LR_simple_input', 'pytorch_LR_simple_input', 'pytorch_LR_static_only', 'pytorch_LR_temporal_cat_static_org_input'])
auprc_sum = auprc_sum.round(3)
auprc_sum

Unnamed: 0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18,t19,t20,t21
baseline,0.252,0.171,0.678,0.364,0.318,0.283,0.823,0.482,0.386,0.386,0.004,0.101,0.187,0.036,0.314,0.082,0.029,0.447,0.464,0.334,0.1
sklearn_LR_simple_input,0.447,0.24,0.77,0.478,0.379,0.327,0.941,0.602,0.596,0.702,0.054,0.146,0.24,0.084,0.417,0.105,0.041,0.504,0.558,0.37,0.24
pytorch_LR_simple_input,0.408,0.216,0.761,0.432,0.371,0.312,0.938,0.589,0.44,0.696,0.028,0.137,0.234,0.068,0.357,0.107,0.043,0.501,0.543,0.353,0.215
pytorch_LR_static_only,0.248,0.16,0.581,0.339,0.291,0.283,0.936,0.438,0.352,0.508,0.028,0.092,0.228,0.053,0.306,0.088,0.026,0.495,0.537,0.41,0.075
pytorch_LR_temporal_cat_static_org_input,0.249,0.336,0.748,0.409,0.372,0.296,0.696,0.527,0.413,0.534,0.027,0.111,0.158,0.033,0.328,0.081,0.042,0.496,0.536,0.411,0.221


In [463]:
final_auroc_compare = np.zeros((4, targets_dim))
final_auroc_compare[0] = auroc
final_auroc_compare[1] = auroc_list_pytorch_base
final_auroc_compare[2] = auroc_pytorch_only_static 
final_auroc_compare[3] = auroc_pytorch_temporal_cat_static
auroc_sum = pd.DataFrame(final_auroc_compare, columns=col_names,
                        index = ['sklearn_LR', 'pytorch_LR', 'pytorch_LR_static_only', 'pytorch_LR_temporal_cat_static'])
auroc_sum = auroc_sum.round(3)
auroc_sum

Unnamed: 0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16,t17,t18,t19,t20,t21
sklearn_LR,0.694,0.609,0.633,0.635,0.576,0.561,0.806,0.63,0.663,0.76,0.94,0.613,0.586,0.686,0.61,0.57,0.619,0.561,0.613,0.548,0.702
pytorch_LR,0.645,0.597,0.647,0.566,0.574,0.536,0.795,0.631,0.544,0.735,0.932,0.609,0.576,0.674,0.545,0.568,0.586,0.533,0.606,0.518,0.718
pytorch_LR_static_only,0.497,0.507,0.379,0.471,0.431,0.496,0.792,0.447,0.474,0.634,0.933,0.472,0.574,0.654,0.489,0.522,0.433,0.52,0.606,0.545,0.368
pytorch_LR_temporal_cat_static,0.499,0.695,0.632,0.562,0.576,0.504,0.212,0.555,0.539,0.65,0.912,0.529,0.433,0.498,0.512,0.48,0.572,0.519,0.606,0.544,0.698
