In [1]:
import math
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
from torch.autograd import Variable
import torch.optim as optim
import copy
from scipy.interpolate import interp1d
dtype = torch.FloatTensor

In [2]:
def sort_data(path):
    data = pd.read_csv(path)
    data.sort_values("OS.time",ascending = False, inplace = True)
    x = data.drop(["Patient_ID", "race_black", "race_white", "age", "stageh","gradeh", "OS", "OS.time"], axis = 1).values
    ytime = data.loc[:, ["OS.time"]].values
    yevent = data.loc[:, ["OS"]].values
    age = data.loc[:, ["age"]].values
    cstage = data.loc[:, ["stageh"]].values
    hgrade = data.loc[:, ["gradeh"]].values
    race_black = data.loc[:, ["race_black"]].values
    race_white = data.loc[:, ["race_white"]].values
    return(x, ytime, yevent, age, cstage, hgrade, race_black, race_white)

def load_data(path, dtype):
    x, ytime, yevent, age, cstage, hgrade, race_black, race_white = sort_data(path)
    X = torch.from_numpy(x).type(dtype)
    YTIME = torch.from_numpy(ytime).type(dtype)
    YEVENT = torch.from_numpy(yevent).type(dtype)
    AGE = torch.from_numpy(age).type(dtype)
    CSTAGE = torch.from_numpy(cstage).type(dtype)
    HGRADE = torch.from_numpy(hgrade).type(dtype)
    RACE_BLACK = torch.from_numpy(race_black).type(dtype)
    RACE_WHITE = torch.from_numpy(race_white).type(dtype)
    if torch.cuda.is_available():
        X = X.cuda()
        YTIME = YTIME.cuda()
        YEVENT = YEVENT.cuda()
        AGE = AGE.cuda()
        CSTAGE = CSTAGE.cuda()
        HGRADE = HGRADE.cuda()
        RACE_BLACK = RACE_BLACK.cuda()
        RACE_WHITE = RACE_WHITE.cuda()
    return(X, YTIME, YEVENT, AGE, CSTAGE, HGRADE, RACE_BLACK, RACE_WHITE)

In [3]:
class EarlyStopping:
    def __init__(self, patience, verbose=False, delta=0):
        
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.counter % 20 == 0:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), 'checkpoint.pt')
        self.val_loss_min = val_loss

In [4]:
def reconstruction_loss(x, x_recon):
    batch_size = x.size(0)
    assert batch_size != 0
    
    recon_loss = F.mse_loss(x_recon, x, size_average=False).div(batch_size)

    return recon_loss

def kl_divergence(mu, logvar):
    batch_size = mu.size(0)
    assert batch_size != 0
    
    klds = -0.5*(1 + logvar - mu.pow(2) - logvar.exp())
    total_kld = klds.sum(1).mean(0, True)
    dimension_wise_kld = klds.mean(0)
    mean_kld = klds.mean(1).mean(0, True)

    return total_kld, dimension_wise_kld, mean_kld

In [5]:
def reparametrize(mu, logvar):
    std = logvar.div(2).exp()
    eps = Variable(std.data.new(std.size()).normal_())
    return mu + std*eps

In [6]:
def kaiming_init(m):
    if isinstance(m, nn.Linear):
        init.kaiming_normal_(m.weight)
        if m.bias is not None:
            m.bias.data.fill_(0)

In [7]:
class BetaVAE_H(nn.Module):
    """Model proposed in original beta-VAE paper(Higgins et al, ICLR, 2017). Modifications made to best accommodate our data"""

    def __init__(self, z_dim, input_n):
        super(BetaVAE_H, self).__init__()
        self.z_dim = z_dim
        self.nc = input_n
        self.encoder = nn.Sequential(
            nn.Linear(input_n, 200),          
            nn.ReLU(True),
            nn.Linear(200, 50),         
            nn.ReLU(True),
            nn.Linear(50, z_dim*2)            
        )
        self.decoder = nn.Sequential(
            nn.Linear(z_dim, 50),                             
            nn.ReLU(True),
            nn.Linear(50, 200),      
            nn.ReLU(True),
            nn.Linear(200, input_n)
        )
        
        self.weight_init()

    def weight_init(self):
        for block in self._modules:
            for m in self._modules[block]:
                kaiming_init(m)

    def forward(self, x):
        distributions = self._encode(x)
        mu = distributions[:, :self.z_dim]
        logvar = distributions[:, self.z_dim:]
        z = reparametrize(mu, logvar)
        x_recon = self._decode(z)

        return x_recon, mu, logvar

    def _encode(self, x):
        return self.encoder(x)

    def _decode(self, z):
        return self.decoder(z)

In [8]:
def trainBetaVAE_H(train_x, eval_x, z_dim, input_n, Learning_Rate, L2, Num_Epochs, patience, beta):
    net = BetaVAE_H(z_dim, input_n)
    
    early_stopping = EarlyStopping(patience = patience, verbose = False)
    
    if torch.cuda.is_available():
        net.cuda()
    opt = optim.Adam(net.parameters(), lr=Learning_Rate, weight_decay = L2)
    for epoch in range(Num_Epochs+1):
        net.train()
        opt.zero_grad()
        
        x_recon, mu, logvar = net(train_x)
        recon_loss = reconstruction_loss(train_x, x_recon)
        total_kld, dim_wise_kld, mean_kld = kl_divergence(mu, logvar)
        beta_vae_loss = recon_loss + beta*total_kld
        
        beta_vae_loss.backward()
        opt.step()
        
        net.eval()
        val_x_recon, val_mu, val_logvar = net(eval_x)
        val_recon_loss = reconstruction_loss(eval_x, val_x_recon)
        val_total_kld, val_dim_wise_kld, val_mean_kld = kl_divergence(val_mu, val_logvar)
        val_loss = val_recon_loss + beta*val_total_kld
        
        early_stopping(val_loss, net)
        if early_stopping.early_stop:
            net.train()
            tr_x_recon, tr_mu, tr_logvar = net(train_x)
            tr_recon_loss = reconstruction_loss(train_x, tr_x_recon)
            tr_total_kld, tr_dim_wise_kld, tr_mean_kld = kl_divergence(tr_mu, tr_logvar)
            tr_loss = tr_recon_loss + beta*tr_total_kld
            print("Early stopping, Number of epochs: ", epoch, ", Loss in Validation: ", val_loss, ", Loss in Training: ", tr_loss)
            break
        if epoch % 200 == 0:
            net.train()
            tr_x_recon, tr_mu, tr_logvar = net(train_x)
            tr_recon_loss = reconstruction_loss(train_x, tr_x_recon)
            tr_total_kld, tr_dim_wise_kld, tr_mean_kld = kl_divergence(tr_mu, tr_logvar)
            tr_loss = tr_recon_loss + beta*tr_total_kld
            print("Loss in Train: ", tr_loss)
    return (tr_loss, val_loss, tr_mu, tr_logvar, val_mu, val_logvar)

In [232]:
z_dim = 10
input_n = 929
Initial_Learning_Rate = [0.03, 0.01, 0.001, 0.00075]
L2_Lambda = [0.1, 0.01, 0.005, 0.001]
patience = 100
beta = 1000
num_epochs = 600
Num_EPOCHS = 2000
x_train, ytime_train, yevent_train, age_train, cstage_train, hgrade_train, race_black_train, race_white_train = load_data("D:/DL/Variational autoencoder/Tryout_12_30_2020/divided_data/exp_20/data_tr_20.csv", dtype)
x_valid, ytime_valid, yevent_valid, age_valid, cstage_valid, hgrade_valid, race_black_valid, race_white_valid = load_data("D:/DL/Variational autoencoder/Tryout_12_30_2020/divided_data/exp_20/data_val_20.csv", dtype)
x_test, ytime_test, yevent_test, age_test, cstage_test, hgrade_test, race_black_test, race_white_test = load_data("D:/DL/Variational autoencoder/Tryout_12_30_2020/divided_data/exp_20/data_tes_20.csv", dtype)
opt_l2_loss = 0
opt_lr_loss = 0
opt_loss = torch.Tensor([float("Inf")])
if torch.cuda.is_available():
    opt_loss = opt_loss.cuda()
for l2 in L2_Lambda:
    for lr in Initial_Learning_Rate:
        loss_train, loss_valid, tr_mu, tr_logvar, val_mu, val_logvar = trainBetaVAE_H(x_train, x_valid, z_dim, input_n, lr, l2, num_epochs, patience, beta)
        if loss_valid < opt_loss:
            opt_l2_loss = l2
            opt_lr_loss = lr
            opt_loss = loss_valid
        print ("L2: ", l2, ", LR: ", lr, ", Loss in Validation: ", loss_valid)
loss_train, loss_test, tr_mu, tr_logvar, tes_mu, tes_logvar = trainBetaVAE_H(x_train, x_test, z_dim, input_n, opt_lr_loss, opt_l2_loss, Num_EPOCHS, patience, beta)
print ("Optimal L2: ", opt_l2_loss, ", Optimal LR: ", opt_lr_loss)



Loss in Train:  tensor([1.5432e+20], grad_fn=<AddBackward0>)
Loss in Train:  tensor([nan], grad_fn=<AddBackward0>)
Loss in Train:  tensor([nan], grad_fn=<AddBackward0>)
Loss in Train:  tensor([nan], grad_fn=<AddBackward0>)
L2:  0.1 , LR:  0.03 , Loss in Validation:  tensor([nan], grad_fn=<AddBackward0>)
Loss in Train:  tensor([608042.], grad_fn=<AddBackward0>)
EarlyStopping counter: 20 out of 100
EarlyStopping counter: 40 out of 100
EarlyStopping counter: 20 out of 100
EarlyStopping counter: 40 out of 100
Loss in Train:  tensor([927.0010], grad_fn=<AddBackward0>)
EarlyStopping counter: 60 out of 100
EarlyStopping counter: 80 out of 100
EarlyStopping counter: 20 out of 100
EarlyStopping counter: 20 out of 100
EarlyStopping counter: 40 out of 100
EarlyStopping counter: 60 out of 100
EarlyStopping counter: 80 out of 100
Loss in Train:  tensor([926.1686], grad_fn=<AddBackward0>)
EarlyStopping counter: 20 out of 100
EarlyStopping counter: 40 out of 100
EarlyStopping counter: 60 out of 100
E

In [233]:
print(tr_mu.size())

torch.Size([270, 10])


In [234]:
tr_z = reparametrize(tr_mu, tr_logvar)
tes_z = reparametrize(tes_mu, tes_logvar)

In [235]:
tr_z.size()

torch.Size([270, 10])

In [236]:
np.savetxt("D:/DL/Variational autoencoder/Tryout_12_30_2020/divided_data/exp_20/latent_features_20/beta_1000/tr_z_20.csv", tr_z.cpu().detach().numpy(), delimiter = ",")
np.savetxt("D:/DL/Variational autoencoder/Tryout_12_30_2020/divided_data/exp_20/latent_features_20/beta_1000/tes_z_20.csv", tes_z.cpu().detach().numpy(), delimiter = ",")

In [237]:
processed_tr_pre = torch.cat((tr_z, ytime_train, yevent_train, age_train, cstage_train, hgrade_train, race_black_train, race_white_train), 1)
processed_tes_pre = torch.cat((tes_z, ytime_test, yevent_test, age_test, cstage_test, hgrade_test, race_black_test, race_white_test), 1)

In [238]:
processed_tr_pre.size()

torch.Size([270, 17])

In [239]:
processed_tr = pd.DataFrame(processed_tr_pre, columns = ['Z_1', 'Z_2', 'Z_3', 'Z_4', 'Z_5', 'Z_6', 'Z_7', 
                                                         'Z_8', 'Z_9', 'Z_10', 'OS.time', 'OS.event', 'age', 
                                                         'stageh', 'gradeh', 'race_black', 'race_white'])
processed_tr = processed_tr.astype(float)
processed_tes = pd.DataFrame(processed_tes_pre, columns = ['Z_1', 'Z_2', 'Z_3', 'Z_4', 'Z_5', 'Z_6', 'Z_7', 
                                                           'Z_8', 'Z_9', 'Z_10', 'OS.time', 'OS.event', 'age', 
                                                           'stageh', 'gradeh', 'race_black', 'race_white'])
processed_tes = processed_tes.astype(float)

In [240]:
print(processed_tes)

         Z_1       Z_2       Z_3       Z_4       Z_5       Z_6       Z_7  \
0  -0.300747 -0.045133 -1.574540 -0.750193  0.259187  0.877949  0.800760   
1  -0.279796  1.967330  0.549393 -2.172440  0.156980 -1.666545  0.583835   
2   0.506962  0.010425  1.180324  1.231991  0.608189 -0.098923 -1.325808   
3  -0.530668  0.145328  0.318735  0.859650  0.946244  0.272163 -1.399646   
4  -1.085055  1.625088  0.914776 -0.346729  0.324161 -0.889468  0.745793   
..       ...       ...       ...       ...       ...       ...       ...   
73 -0.046637 -0.075478  0.226700  1.838614  3.140055 -2.024279  0.820216   
74  0.376160 -0.846681 -0.426726 -0.705482 -0.553578 -0.327714 -0.848431   
75 -1.656599 -2.176720  0.085181  0.442308  1.331213 -0.056378  0.101221   
76 -0.570199  0.292508  0.586788  1.595318  0.494548  1.103488  0.132523   
77 -0.474622 -1.240323  1.478146 -0.680126 -0.407574 -0.792985 -0.481261   

         Z_8       Z_9      Z_10  OS.time  OS.event   age  stageh  gradeh  \
0   1.5989

In [19]:
import lifelines
from lifelines import CoxPHFitter

In [104]:
processed_tes['gradeh'].sum()

74.0

In [242]:
cph = CoxPHFitter()
cph.fit(processed_tes, duration_col='OS.time', event_col='OS.event')
cph.print_summary()


>>> events = df['OS.event'].astype(bool)
>>> print(df.loc[events, 'race_black'].var())
>>> print(df.loc[~events, 'race_black'].var())

A very low variance means that the column race_black completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




0,1
model,lifelines.CoxPHFitter
duration col,'OS.time'
event col,'OS.event'
baseline estimation,breslow
number of observations,78
number of events observed,47
partial log-likelihood,-152.44
time fit was run,2021-01-02 02:56:17 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,z,p,-log2(p)
Z_1,0.37,1.44,0.2,-0.02,0.76,0.98,2.13,1.84,0.07,3.93
Z_2,-0.41,0.66,0.19,-0.79,-0.04,0.46,0.97,-2.15,0.03,4.97
Z_3,0.26,1.3,0.19,-0.12,0.64,0.88,1.9,1.33,0.18,2.44
Z_4,-0.11,0.9,0.21,-0.52,0.31,0.59,1.36,-0.5,0.62,0.69
Z_5,0.57,1.77,0.24,0.11,1.04,1.11,2.82,2.41,0.02,5.97
Z_6,-0.04,0.96,0.18,-0.39,0.31,0.68,1.37,-0.21,0.84,0.26
Z_7,0.38,1.46,0.19,-0.0,0.75,1.0,2.13,1.94,0.05,4.27
Z_8,0.08,1.08,0.17,-0.26,0.41,0.77,1.51,0.45,0.66,0.61
Z_9,0.38,1.46,0.21,-0.03,0.78,0.97,2.19,1.82,0.07,3.87
Z_10,0.04,1.04,0.13,-0.21,0.29,0.81,1.33,0.3,0.77,0.38

0,1
Concordance,0.73
Partial AIC,334.88
log-likelihood ratio test,25.87 on 15 df
-log2(p) of ll-ratio test,4.66
