In [1]:
import math
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
from torch.autograd import Variable
import torch.optim as optim
import copy
from scipy.interpolate import interp1d
dtype = torch.FloatTensor

In [16]:
def sort_data(path):
    data = pd.read_csv(path)
    data.sort_values("OS.time",ascending = False, inplace = True)
    x = data.drop(["Patient_ID", "race_white", "age", "stageh","gradeh", "OS", "OS.time"], axis = 1).values
    ytime = data.loc[:, ["OS.time"]].values
    yevent = data.loc[:, ["OS"]].values
    age = data.loc[:, ["age"]].values
    cstage = data.loc[:, ["stageh"]].values
    hgrade = data.loc[:, ["gradeh"]].values
    race_white = data.loc[:, ["race_white"]].values
    return(x, ytime, yevent, age, cstage, hgrade, race_white)

def load_data(path, dtype):
    x, ytime, yevent, age, cstage, hgrade, race_white = sort_data(path)
    X = torch.from_numpy(x).type(dtype)
    YTIME = torch.from_numpy(ytime).type(dtype)
    YEVENT = torch.from_numpy(yevent).type(dtype)
    AGE = torch.from_numpy(age).type(dtype)
    CSTAGE = torch.from_numpy(cstage).type(dtype)
    HGRADE = torch.from_numpy(hgrade).type(dtype)
    RACE_WHITE = torch.from_numpy(race_white).type(dtype)
    if torch.cuda.is_available():
        X = X.cuda()
        YTIME = YTIME.cuda()
        YEVENT = YEVENT.cuda()
        AGE = AGE.cuda()
        CSTAGE = CSTAGE.cuda()
        HGRADE = HGRADE.cuda()
        RACE_WHITE = RACE_WHITE.cuda()
    return(X, YTIME, YEVENT, AGE, CSTAGE, HGRADE, RACE_WHITE)

In [17]:
class EarlyStopping:
    def __init__(self, patience, verbose=False, delta=0):
        
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.counter % 20 == 0:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), 'checkpoint.pt')
        self.val_loss_min = val_loss

In [25]:
def reconstruction_loss(x, x_recon):
    batch_size = x.size(0)
    assert batch_size != 0
    
    recon_loss = F.mse_loss(x_recon, x, reduction='sum').div(batch_size)

    return recon_loss

def kl_divergence(mu, logvar):
    batch_size = mu.size(0)
    assert batch_size != 0
    
    klds = -0.5*(1 + logvar - mu.pow(2) - logvar.exp())
    total_kld = klds.sum(1).mean(0, True)
    dimension_wise_kld = klds.mean(0)
    mean_kld = klds.mean(1).mean(0, True)

    return total_kld, dimension_wise_kld, mean_kld

In [19]:
def reparametrize(mu, logvar):
    std = logvar.div(2).exp()
    eps = Variable(std.data.new(std.size()).normal_())
    return mu + std*eps

In [20]:
def kaiming_init(m):
    if isinstance(m, nn.Linear):
        init.kaiming_normal_(m.weight)
        if m.bias is not None:
            m.bias.data.fill_(0)

In [21]:
class BetaVAE_H(nn.Module):
    """Model proposed in original beta-VAE paper(Higgins et al, ICLR, 2017). Modifications made to best accommodate our data"""

    def __init__(self, z_dim, input_n):
        super(BetaVAE_H, self).__init__()
        self.z_dim = z_dim
        self.nc = input_n
        self.encoder = nn.Sequential(
            nn.Linear(input_n, 3200),          
            nn.ReLU(True),
            nn.Linear(3200, 800),          
            nn.ReLU(True),
            nn.Linear(800, 200),         
            nn.ReLU(True),
            nn.Linear(200, 50),         
            nn.ReLU(True),
            nn.Linear(50, z_dim*2)            
        )
        self.decoder = nn.Sequential(
            nn.Linear(z_dim, 50),                             
            nn.ReLU(True),
            nn.Linear(50, 200),         
            nn.ReLU(True),
            nn.Linear(200, 800),         
            nn.ReLU(True),
            nn.Linear(800, 3200),      
            nn.ReLU(True),
            nn.Linear(3200, input_n)
        )
        
        self.weight_init()

    def weight_init(self):
        for block in self._modules:
            for m in self._modules[block]:
                kaiming_init(m)

    def forward(self, x):
        distributions = self._encode(x)
        mu = distributions[:, :self.z_dim]
        logvar = distributions[:, self.z_dim:]
        z = reparametrize(mu, logvar)
        x_recon = self._decode(z)

        return x_recon, mu, logvar

    def _encode(self, x):
        return self.encoder(x)

    def _decode(self, z):
        return self.decoder(z)

In [26]:
def trainBetaVAE_H(train_x, eval_x, z_dim, input_n, Learning_Rate, L2, Num_Epochs, patience, beta):
    net = BetaVAE_H(z_dim, input_n)
    
    early_stopping = EarlyStopping(patience = patience, verbose = False)
    
    if torch.cuda.is_available():
        net.cuda()
    opt = optim.Adam(net.parameters(), lr=Learning_Rate, weight_decay = L2)
    for epoch in range(Num_Epochs+1):
        net.train()
        opt.zero_grad()
        
        x_recon, mu, logvar = net(train_x)
        recon_loss = reconstruction_loss(train_x, x_recon)
        total_kld, dim_wise_kld, mean_kld = kl_divergence(mu, logvar)
        beta_vae_loss = recon_loss + beta*total_kld
        
        beta_vae_loss.backward()
        opt.step()
        
        net.eval()
        val_x_recon, val_mu, val_logvar = net(eval_x)
        val_recon_loss = reconstruction_loss(eval_x, val_x_recon)
        val_total_kld, val_dim_wise_kld, val_mean_kld = kl_divergence(val_mu, val_logvar)
        val_loss = val_recon_loss + beta*val_total_kld
        
        early_stopping(val_loss, net)
        if early_stopping.early_stop:
            net.train()
            tr_x_recon, tr_mu, tr_logvar = net(train_x)
            tr_recon_loss = reconstruction_loss(train_x, tr_x_recon)
            tr_total_kld, tr_dim_wise_kld, tr_mean_kld = kl_divergence(tr_mu, tr_logvar)
            tr_loss = tr_recon_loss + beta*tr_total_kld
            print("Early stopping, Number of epochs: ", epoch, ", Loss in Validation: ", val_loss, ", Loss in Training: ", tr_loss)
            break
        if epoch % 200 == 0:
            net.train()
            tr_x_recon, tr_mu, tr_logvar = net(train_x)
            tr_recon_loss = reconstruction_loss(train_x, tr_x_recon)
            tr_total_kld, tr_dim_wise_kld, tr_mean_kld = kl_divergence(tr_mu, tr_logvar)
            tr_loss = tr_recon_loss + beta*tr_total_kld
            print("Number of epochs: ", epoch, ", Loss in Train: ", tr_loss, ", Loss in validation: ", val_loss)
    return (tr_loss, val_loss, tr_mu, tr_logvar, val_mu, val_logvar)

In [34]:
z_dim = 25
input_n = 12332
lr = 0.001
L2_Lambda = [0.1, 0.01, 0.005, 0.001]
patience = 100
beta = 125
num_epochs = 600
Num_EPOCHS = 2000
x_train, ytime_train, yevent_train, age_train, cstage_train, hgrade_train, race_white_train = load_data("D:/DL/Variational autoencoder/Tryout_01_07_2021/divided_data/exp_1/data_tr_1.csv", dtype)
x_valid, ytime_valid, yevent_valid, age_valid, cstage_valid, hgrade_valid, race_white_valid = load_data("D:/DL/Variational autoencoder/Tryout_01_07_2021/divided_data/exp_1/data_val_1.csv", dtype)
x_test, ytime_test, yevent_test, age_test, cstage_test, hgrade_test, race_white_test = load_data("D:/DL/Variational autoencoder/Tryout_01_07_2021/divided_data/exp_1/data_tes_1.csv", dtype)
opt_l2_loss = 0
opt_loss = torch.Tensor([float("Inf")])
if torch.cuda.is_available():
    opt_loss = opt_loss.cuda()
for l2 in L2_Lambda:
    loss_train, loss_valid, tr_mu, tr_logvar, val_mu, val_logvar = trainBetaVAE_H(x_train, x_valid, z_dim, input_n, lr, l2, num_epochs, patience, beta)
    if loss_valid < opt_loss:
        opt_l2_loss = l2
        opt_loss = loss_valid
    print ("L2: ", l2, ", Loss in Validation: ", loss_valid)
loss_train, loss_test, tr_mu, tr_logvar, tes_mu, tes_logvar = trainBetaVAE_H(x_train, x_test, z_dim, input_n, lr, opt_l2_loss, Num_EPOCHS, patience, beta)
print ("Optimal L2: ", opt_l2_loss, ", Optimal Loss in Validation: ", opt_loss)

Number of epochs:  0 , Loss in Train:  tensor([9960807.], grad_fn=<AddBackward0>) , Loss in validation:  tensor([895640.2500], grad_fn=<AddBackward0>)
EarlyStopping counter: 20 out of 100
EarlyStopping counter: 40 out of 100
EarlyStopping counter: 60 out of 100
EarlyStopping counter: 80 out of 100
EarlyStopping counter: 100 out of 100
Early stopping, Number of epochs:  103 , Loss in Validation:  tensor([3.6541e+10], grad_fn=<AddBackward0>) , Loss in Training:  tensor([1.8965e+24], grad_fn=<AddBackward0>)
L2:  0.1 , Loss in Validation:  tensor([3.6541e+10], grad_fn=<AddBackward0>)
Number of epochs:  0 , Loss in Train:  tensor([5.1109e+09], grad_fn=<AddBackward0>) , Loss in validation:  tensor([8007647.], grad_fn=<AddBackward0>)
EarlyStopping counter: 20 out of 100
EarlyStopping counter: 40 out of 100
EarlyStopping counter: 60 out of 100
EarlyStopping counter: 80 out of 100
Number of epochs:  200 , Loss in Train:  tensor([12373.8701], grad_fn=<AddBackward0>) , Loss in validation:  tensor

In [35]:
tr_z = reparametrize(tr_mu, tr_logvar)
tes_z = reparametrize(tes_mu, tes_logvar)

print(tr_z.size())

#np.savetxt("D:/DL/Variational autoencoder/Tryout_01_07_2021/divided_data/exp_1/latent_features_1/beta_125/tr_z_1.csv", tr_z.cpu().detach().numpy(), delimiter = ",")
#np.savetxt("D:/DL/Variational autoencoder/Tryout_01_07_2021/divided_data/exp_1/latent_features_1/beta_125/tes_z_1.csv", tes_z.cpu().detach().numpy(), delimiter = ",")

processed_tr_pre = torch.cat((tr_z, ytime_train, yevent_train, age_train, cstage_train, hgrade_train, race_white_train), 1)
processed_tes_pre = torch.cat((tes_z, ytime_test, yevent_test, age_test, cstage_test, hgrade_test, race_white_test), 1)

processed_tr = pd.DataFrame(processed_tr_pre, columns = ['Z_1', 'Z_2', 'Z_3', 'Z_4', 'Z_5', 'Z_6', 'Z_7', 
                                                         'Z_8', 'Z_9', 'Z_10', 'Z_11', 'Z_12', 'Z_13', 
                                                         'Z_14', 'Z_15', 'Z_16', 'Z_17', 'Z_18', 'Z_19', 
                                                         'Z_20', 'Z_21', 'Z_22', 'Z_23', 'Z_24', 'Z_25', 'OS.time', 'OS.event', 'age', 
                                                         'stageh', 'gradeh', 'race_white'])
processed_tr = processed_tr.astype(float)
processed_tes = pd.DataFrame(processed_tes_pre, columns = ['Z_1', 'Z_2', 'Z_3', 'Z_4', 'Z_5', 'Z_6', 'Z_7', 
                                                           'Z_8', 'Z_9', 'Z_10', 'Z_11', 'Z_12', 'Z_13', 
                                                           'Z_14', 'Z_15', 'Z_16', 'Z_17', 'Z_18', 'Z_19', 
                                                           'Z_20', 'Z_21', 'Z_22', 'Z_23', 'Z_24', 'Z_25', 'OS.time', 'OS.event', 'age', 
                                                           'stageh', 'gradeh', 'race_white'])
processed_tes = processed_tes.astype(float)

torch.Size([269, 25])


In [36]:
print(processed_tes)

         Z_1       Z_2       Z_3       Z_4       Z_5       Z_6       Z_7  \
0   1.494316  0.781556 -1.147801  0.880775 -0.056339 -2.126174  1.255200   
1   0.468572  1.142770  1.508999 -0.311027 -0.234807  0.120927 -0.466899   
2   0.155375 -0.300734  0.066595 -1.085659 -0.875735  0.977980 -0.335594   
3   1.442001  0.395555  1.502475 -1.119425  0.300102  0.869957 -0.616052   
4  -0.735882  1.270305 -0.856642  0.305972  0.653906  0.635487 -0.833827   
..       ...       ...       ...       ...       ...       ...       ...   
90 -0.136664  1.290545  0.885622 -0.702092 -0.376044  0.941989  0.100371   
91 -0.317133  0.984024 -0.199821 -0.189026 -0.229435 -0.934524 -0.305973   
92 -0.751959 -0.076648 -2.176960 -0.503966 -0.923624 -0.376650  0.168631   
93 -1.155667 -1.174169  0.043470  1.023346  0.166031  0.228310 -0.772525   
94  1.546011  0.487233  1.392713  0.020101  0.512275 -0.986085  0.530113   

         Z_8       Z_9      Z_10  ...      Z_22      Z_23      Z_24      Z_25  \
0  -0.

In [31]:
import lifelines
from lifelines import CoxPHFitter

In [38]:
cph = CoxPHFitter(l1_ratio = 1., penalizer = 0.0001)
cph.fit(processed_tes, duration_col='OS.time', event_col='OS.event')
cph.print_summary()

0,1
model,lifelines.CoxPHFitter
duration col,'OS.time'
event col,'OS.event'
penalizer,0.0001
l1 ratio,1
baseline estimation,breslow
number of observations,95
number of events observed,67
partial log-likelihood,-220.56
time fit was run,2021-01-08 07:59:33 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,z,p,-log2(p)
Z_1,-0.09,0.91,0.16,-0.41,0.23,0.66,1.26,-0.56,0.58,0.79
Z_2,0.09,1.09,0.16,-0.22,0.4,0.8,1.49,0.55,0.58,0.78
Z_3,0.03,1.03,0.18,-0.32,0.37,0.73,1.45,0.16,0.88,0.19
Z_4,0.11,1.11,0.17,-0.22,0.44,0.8,1.55,0.64,0.52,0.94
Z_5,0.25,1.28,0.17,-0.1,0.59,0.91,1.8,1.41,0.16,2.65
Z_6,-0.06,0.94,0.18,-0.41,0.3,0.66,1.35,-0.31,0.75,0.41
Z_7,-0.09,0.92,0.19,-0.46,0.29,0.63,1.33,-0.46,0.65,0.63
Z_8,0.72,2.05,0.23,0.28,1.16,1.32,3.2,3.18,<0.005,9.38
Z_9,-0.31,0.73,0.19,-0.68,0.06,0.51,1.06,-1.66,0.10,3.36
Z_10,0.44,1.55,0.16,0.12,0.76,1.13,2.13,2.73,0.01,7.3

0,1
Concordance,0.75
Partial AIC,499.12
log-likelihood ratio test,47.99 on 29 df
-log2(p) of ll-ratio test,6.09


In [39]:
np.savetxt("D:/DL/Variational autoencoder/Tryout_01_07_2021/divided_data/exp_1/latent_features_1/beta_125/tr_comb_z_1.csv", tr_z.cpu().detach().numpy(), delimiter = ",")
np.savetxt("D:/DL/Variational autoencoder/Tryout_01_07_2021/divided_data/exp_1/latent_features_1/beta_125/tes_comb_z_1.csv", tes_z.cpu().detach().numpy(), delimiter = ",")