In [None]:
from Data_Loader import load_data
from Train import trainCox_nnet

import torch
import numpy as np
import pandas as pd
from torch.utils.data.dataloader import DataLoader

# Hyperparameters

In [None]:
dtype = torch.FloatTensor
''' Net Settings'''
Hidden_Nodes = 143 ###number of hidden nodes
Out_Nodes = 30 ###number of hidden nodes in the last hidden layer
''' Initialize '''
Initial_Learning_Rate = [0.01, 0.001, 0.00075]
L2_Lambda = [0.01, 0.005, 0.001]
L1_Lambda = [0.01, 0.005, 0.001]
num_epochs = 3 ###for pancreas
Num_EPOCHS = 3 ###for lung
###sub-network setup
Dropout_Rate = [0.7, 0.5]

# Data preprocessing 

### Import the data 

In [None]:
from Data_extraction_lung import data_norm_df_lung, output_df_lung

data_norm_df_lung= data_norm_df_lung.reset_index(drop=True)
output_df_lung = output_df_lung.reset_index(drop=True)

In [None]:
from Data_extraction_pancreas import data_norm_df_pancreas, output_df_pancreas
data_norm_df_pancreas= data_norm_df_pancreas.reset_index(drop=True)
output_df_pancreas = output_df_pancreas.reset_index(drop=True)

In [None]:
data = pd.concat([data_norm_df_lung,output_df_lung], axis=1)
x, ytime, yevent, age = load_data(data, dtype)

### Standardisation + Variance threshold

In [None]:
#Standardisation
from sklearn.preprocessing import StandardScaler
x = StandardScaler().fit_transform(x)

#Variance threshold
from sklearn.feature_selection import VarianceThreshold
selector = VarianceThreshold(0.2)
x = selector.fit_transform(x)

In [None]:
from Data_Loader import CustomDataset

batch_size=32
data2 = CustomDataset(x, ytime, yevent, age)
In_Nodes = len(x[0,:]) ###number of genes
print(np.shape(x))

# Model

### Grid search for the optimal learning rate, regularisation l1 and L2, drop out 

In [None]:
opt_l2_loss = 0
opt_l1_loss = 0
opt_lr_loss = 0
opt_do_loss = 0
opt_loss = torch.Tensor([float("Inf")])
###if gpu is being used
if torch.cuda.is_available():
	opt_loss = opt_loss.cuda()
###
opt_c_index_va = 0
opt_c_index_tr = 0

In [None]:
for l2 in L2_Lambda:
	for lr in Initial_Learning_Rate:
		for l1 in L1_Lambda:
			history_train, history_val = trainCox_nnet(data2, \
				In_Nodes, Hidden_Nodes, Out_Nodes, \
				lr, l2, l1, num_epochs, 0.5, batch_size)
			c_index_val_fold0 = [k['c_index'] for k in history_val[0]]
			c_index_val_fold1 = [k['c_index'] for k in history_val[1]]
			c_index=np.mean([c_index_val_fold0[-1],c_index_val_fold1[-1]])
			if c_index==0: 
				break
			elif c_index > opt_c_index_va:
				opt_l2_loss = l2
				opt_lr_loss = lr
				opt_l1_loss = l1
				opt_do_loss = 0.5
				opt_c_index_va = c_index
				# opt_c_index_tr = c_index_tr
				# opt_c_index_va = c_index_va
			print ("L2: ", l2, "L1:", l1, "LR: ", lr, "c_index", opt_c_index_va)

### Train the model 

In [None]:
###train Cox-nnet with optimal hyperparameters using train data, and then evaluate the trained model with test data
###Note that test data are only used to evaluate the trained Cox-nnet
history_train, history_val = trainCox_nnet(data2, \
			In_Nodes, Hidden_Nodes, Out_Nodes, \
			opt_lr_loss, opt_l2_loss, opt_l1_loss, Num_EPOCHS, opt_do_loss, batch_size)
print ("Optimal L2: ", opt_l2_loss, "Optimal LR: ", opt_lr_loss)
# print("C-index in Test: ", c_index_te)

# Visualisation

### Plot the accuracy and the loss

In [None]:
loss_train_fold0 = [k['loss'] for k in history_train[0]]
loss_val_fold0 = [k['loss'] for k in history_val[0]] 
c_index_train_fold0 = [k['c_index'] for k in history_train[0]]
c_index_val_fold0 = [k['c_index'] for k in history_val[0]]


loss_train_fold1 = [k['loss'] for k in history_train[1]]
loss_val_fold1 = [k['loss'] for k in history_val[1]] 
c_index_train_fold1 = [k['c_index'] for k in history_train[1]]
c_index_val_fold1 = [k['c_index'] for k in history_val[1]]

loss_train_fold2 = [k['loss'] for k in history_train[2]]
loss_val_fold2 = [k['loss'] for k in history_val[2]] 
c_index_train_fold2 = [k['c_index'] for k in history_train[2]]
c_index_val_fold2 = [k['c_index'] for k in history_val[2]]

loss_train_fold3 = [k['loss'] for k in history_train[3]]
loss_val_fold3 = [k['loss'] for k in history_val[3]] 
c_index_train_fold3 = [k['c_index'] for k in history_train[3]]
c_index_val_fold3 = [k['c_index'] for k in history_val[3]]

loss_train_fold4 = [k['loss'] for k in history_train[4]]
loss_val_fold4 = [k['loss'] for k in history_val[4]] 
c_index_train_fold4 = [k['c_index'] for k in history_train[4]]
c_index_val_fold4 = [k['c_index'] for k in history_val[4]]


In [None]:
loss_train_fold5 = [k['loss'] for k in history_train[5]]
loss_val_fold5 = [k['loss'] for k in history_val[5]] 
c_index_train_fold5 = [k['c_index'] for k in history_train[5]]
c_index_val_fold5 = [k['c_index'] for k in history_val[5]]


loss_train_fold6 = [k['loss'] for k in history_train[6]]
loss_val_fold6 = [k['loss'] for k in history_val[6]] 
c_index_train_fold6 = [k['c_index'] for k in history_train[6]]
c_index_val_fold6 = [k['c_index'] for k in history_val[6]]

loss_train_fold7 = [k['loss'] for k in history_train[7]]
loss_val_fold7 = [k['loss'] for k in history_val[7]] 
c_index_train_fold7 = [k['c_index'] for k in history_train[7]]
c_index_val_fold7 = [k['c_index'] for k in history_val[7]]

loss_train_fold8 = [k['loss'] for k in history_train[8]]
loss_val_fold8 = [k['loss'] for k in history_val[8]] 
c_index_train_fold8 = [k['c_index'] for k in history_train[8]]
c_index_val_fold8 = [k['c_index'] for k in history_val[8]]

loss_train_fold9 = [k['loss'] for k in history_train[9]]
loss_val_fold9 = [k['loss'] for k in history_val[9]] 
c_index_train_fold9 = [k['c_index'] for k in history_train[9]]
c_index_val_fold9 = [k['c_index'] for k in history_val[9]]


In [None]:
epoch = np.arange(0,Num_EPOCHS+1)

In [None]:
import matplotlib.pyplot as plt 
plt.figure(figsize=(10,6))
plt.subplot(221)
plt.plot(epoch, loss_train_fold0, label = 'training')
plt.plot(epoch, loss_val_fold0, label = 'validation')
plt.ylabel('loss = log likelihood')
plt.xlabel('epochs')
plt.legend()

plt.subplot(222)
plt.plot(epoch, loss_train_fold1, label = 'training')
plt.plot(epoch, loss_val_fold1, label = 'validation')
plt.ylabel('loss = log likelihood')
plt.xlabel('epochs')
plt.legend()



plt.subplot(223)
plt.plot(epoch, loss_train_fold2, label = 'training')
plt.plot(epoch, loss_val_fold2, label = 'validation')
plt.ylabel('loss = log likelihood')
plt.xlabel('epochs')
plt.legend()

plt.subplot(224)
plt.plot(epoch, loss_train_fold4, label = 'training')
plt.plot(epoch, loss_val_fold4, label = 'validation')
plt.ylabel('loss = log likelihood')
plt.xlabel('epochs')
plt.legend()



In [None]:
plt.figure(figsize=(10,6))
plt.subplot(221)
plt.plot(epoch, c_index_train_fold0, label = 'training')
plt.plot(epoch, c_index_val_fold0, label = 'validation')
plt.ylabel('acc= c_index')
plt.xlabel('epochs')
plt.legend()

plt.subplot(222)
plt.plot(epoch, c_index_train_fold1, label = 'training')
plt.plot(epoch, c_index_val_fold1, label = 'validation')
plt.ylabel('acc= c_index')
plt.xlabel('epochs')
plt.legend()

plt.subplot(223)
plt.plot(epoch, c_index_train_fold3, label = 'training')
plt.plot(epoch, c_index_val_fold3, label = 'validation')
plt.ylabel('acc= c_index')
plt.xlabel('epochs')
plt.legend()

plt.subplot(224)
plt.plot(epoch, c_index_train_fold4, label = 'training')
plt.plot(epoch, c_index_val_fold4, label = 'validation')
plt.ylabel('acc= c_index')
plt.xlabel('epochs')
plt.legend()


### Summary of the results

In [None]:
#result cross validation

print(np.mean([c_index_val_fold1[4],  c_index_val_fold0[4], c_index_val_fold3[4], c_index_val_fold2[4],c_index_val_fold4[4]]))
print(np.std([c_index_val_fold1[4],  c_index_val_fold0[4], c_index_val_fold3[4], c_index_val_fold2[4],c_index_val_fold4[4]]))
print(([c_index_val_fold1[4],  c_index_val_fold0[4], c_index_val_fold3[4], c_index_val_fold2[4],c_index_val_fold4[4]]))


In [None]:
#result cross validation

print(np.mean([c_index_val_fold0[6],  c_index_val_fold1[6], c_index_val_fold2[6]]))
print(np.std([c_index_val_fold0[6],  c_index_val_fold1[6], c_index_val_fold2[6]]))
print([c_index_val_fold0[6],  c_index_val_fold1[6], c_index_val_fold2[6]])



# Transfer learning 

### Data pre processing

In [None]:
from Data_extraction_pancreas import data_norm_df_pancreas, output_df_pancreas
data_norm_df_pancreas= data_norm_df_pancreas.reset_index(drop=True)
output_df_pancreas = output_df_pancreas.reset_index(drop=True)



In [None]:
data_pancreas = pd.concat([data_norm_df_pancreas,output_df_pancreas], axis=1)
x_pancreas, ytime_pancreas, yevent_pancreas, age_pancreas = load_data(data_pancreas, dtype)

from sklearn.preprocessing import StandardScaler
x_pancreas = StandardScaler().fit_transform(x_pancreas)

In [None]:
from Data_Loader import CustomDataset
batch_size=32
data2_pancreas = CustomDataset(x_pancreas, ytime_pancreas, yevent_pancreas, age_pancreas)

### Model

In [None]:
from Model import Cox_nnet



import torch
import torch.optim as optim
import copy
from scipy.interpolate import interp1d
import numpy as np
import pandas as pd
import torch.nn as nn
from sklearn.model_selection import KFold
from torch.utils.data.dataloader import DataLoader

In_Nodes = 60660
net = Cox_nnet(In_Nodes, Hidden_Nodes, Out_Nodes, 0.5)




In [None]:
from sksurv.metrics import concordance_index_censored
from Survival_CostFunc_CIndex import R_set, neg_par_log_likelihood, c_index

class Cox_nnet_transfer(nn.Module):
    def __init__(self, In_Nodes, Hidden_Nodes, Out_Nodes, Cox_nnet, Dropout): 
        super(Cox_nnet_transfer, self).__init__()
        self.trainer = nn.Sequential(*list(Cox_nnet.children())[:-1]) # strips off last linear layer from Cox_nnet
        # for param in self.trainer.parameters(): 
        #     param.requires_grad = False
        self.classifier = nn.Linear(Out_Nodes+1, 1, bias=False)
        self.classifier.weight.data.uniform_(-0.001, 0.001)
        

        
    def forward(self, x_1, x_2):
        #Normal 1 layer
        x_1=self.trainer(x_1)
        x_cat = torch.cat((x_1, x_2), 1)
        lin_pred=self.classifier(x_cat)
        return lin_pred

    def training_step(self, batch): 
        x_train_b, ytime_train_b, yevent_train_b, age_train_b = batch
        # print(batch)
        pred = self(x_train_b.float(), age_train_b) ###Forward
        loss = neg_par_log_likelihood(pred, ytime_train_b, yevent_train_b) ###calculate loss
        acc = c_index(pred, ytime_train_b, yevent_train_b) #calculate accuracy
        return{'val_loss': loss, 'val_acc': acc}

    def training_epoch_end(self, pred):
        batch_losses = [x['val_loss'] for x in pred]
        epoch_loss = torch.stack(batch_losses).mean()
        batch_accs = [x['val_acc'] for x in pred]
        epoch_acc = torch.stack(batch_accs).mean()
        return{'loss': epoch_loss.item(), 'c_index': epoch_acc.item()}

    def validation_step(self, batch): 
        x_eval_b, ytime_eval_b, yevent_eval_b, age_eval_b = batch
        eval_pred = self(x_eval_b.float(), age_eval_b)
        loss = neg_par_log_likelihood(eval_pred, ytime_eval_b, yevent_eval_b)
        # acc= concordance_index_censored(yevent_eval_b, ytime_eval_b,eval_pred)
        acc = c_index(eval_pred, ytime_eval_b, yevent_eval_b)
        return{'val_loss': loss, 'val_acc': acc}

    def validation_epoch_end(self, pred): 
        batch_losses = [x['val_loss'] for x in pred]
        epoch_loss = torch.stack(batch_losses).mean()
        batch_accs = [x['val_acc'] for x in pred]
        epoch_acc = torch.stack(batch_accs).mean()
        return{'loss': epoch_loss.item(), 'c_index': epoch_acc.item()}
    
    def epoch_end(self, epoch, result): 
        print("Epoch [{}], loss: {:.4f}, c_index: {:.4f}".format(epoch, result['loss'], result['c_index']))

In [None]:
my_model = Cox_nnet_transfer(In_Nodes, Hidden_Nodes, Out_Nodes, net, 0.5)

### Train the model with cross validation

In [None]:
def reset_weights(m):
	for layer in m.children():
		if hasattr(layer, 'reset_parameters'):
			layer.reset_parameters()


dtype = torch.FloatTensor
k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True)
history_val_pancreas=[[],[],[],[],[]]
history_train_pancreas=[[],[],[],[],[]]
for fold,(train_idx,test_idx) in enumerate(kfold.split(data2_pancreas)):
    my_model = Cox_nnet_transfer(In_Nodes, Hidden_Nodes, Out_Nodes, net, 0.5)
    opt = optim.Adam(my_model.parameters(), lr=opt_lr_loss, weight_decay = opt_l2_loss)
    print('------------fold no---------{}----------------------'.format(fold))
    train_loader = DataLoader(data2_pancreas, batch_size=15, sampler=train_idx)
    val_loader = DataLoader(data2_pancreas, batch_size=15, sampler=test_idx)
    # print(train_idx)
    for epoch in range(num_epochs+1):
        #training phase
        pred_train=[]
        for batch in train_loader: 
            loss = my_model.training_step(batch)
            loss = loss['val_loss']
            regularization_loss = 0
            for param in net.parameters():
                regularization_loss += torch.sum(abs(param))
            loss = loss+0.001*regularization_loss
            loss_batch_train.append(loss)
            loss.backward() ###calculate gradientsloss = loss['val_loss']
            opt.step() ###update weights and biases
            opt.zero_grad() ###reset gradients to zeros
            pred_train.append(my_model.training_step(batch))
        result_train = my_model.training_epoch_end(pred_train)
        pred_val = 	[my_model.validation_step(batch) for batch in val_loader]
        result_val = my_model.validation_epoch_end(pred_val)
        my_model.epoch_end(epoch, result_val)
        history_val_pancreas[fold].append(result_val)
        history_train_pancreas[fold].append(result_train)



### Visualisation

In [None]:
epoch = num_epochs+1

In [None]:
loss_train_fold0_pancreas = [k['loss'] for k in history_train_pancreas[0]]
loss_val_fold0_pancreas = [k['loss'] for k in history_val_pancreas[0]] 
c_index_train_fold0_pancreas = [k['c_index'] for k in history_train_pancreas[0]]
c_index_val_fold0_pancreas = [k['c_index'] for k in history_val_pancreas[0]]

loss_train_fold1_pancreas = [k['loss'] for k in history_train_pancreas[1]]
loss_val_fold1_pancreas = [k['loss'] for k in history_val_pancreas[1]] 
c_index_train_fold1_pancreas = [k['c_index'] for k in history_train_pancreas[1]]
c_index_val_fold1_pancreas = [k['c_index'] for k in history_val_pancreas[1]]

loss_train_fold2_pancreas = [k['loss'] for k in history_train_pancreas[2]]
loss_val_fold2_pancreas = [k['loss'] for k in history_val_pancreas[2]] 
c_index_train_fold2_pancreas = [k['c_index'] for k in history_train_pancreas[2]]
c_index_val_fold2_pancreas = [k['c_index'] for k in history_val_pancreas[2]]

loss_train_fold3_pancreas = [k['loss'] for k in history_train_pancreas[3]]
loss_val_fold3_pancreas = [k['loss'] for k in history_val_pancreas[3]] 
c_index_train_fold3_pancreas = [k['c_index'] for k in history_train_pancreas[3]]
c_index_val_fold3_pancreas = [k['c_index'] for k in history_val_pancreas[3]]

loss_train_fold4_pancreas = [k['loss'] for k in history_train_pancreas[4]]
loss_val_fold4_pancreas = [k['loss'] for k in history_val_pancreas[4]] 
c_index_train_fold4_pancreas = [k['c_index'] for k in history_train_pancreas[4]]
c_index_val_fold4_pancreas = [k['c_index'] for k in history_val_pancreas[4]]


In [None]:
epoch = np.arange(0,num_epochs+1)

In [None]:
import matplotlib.pyplot as plt 
plt.figure(figsize=(10,6))
plt.subplot(221)
plt.plot(epoch, loss_train_fold4_pancreas, label = 'training')
plt.plot(epoch, loss_val_fold4_pancreas, label = 'testing')
plt.ylabel('loss = log likelihood')
plt.xlabel('epochs')
plt.legend()

plt.subplot(222)
plt.plot(epoch, loss_train_fold1_pancreas, label = 'training')
plt.plot(epoch, loss_val_fold1_pancreas, label = 'testing')
plt.ylabel('loss = log likelihood')
plt.xlabel('epochs')
plt.legend()



plt.subplot(223)
plt.plot(epoch, loss_train_fold2_pancreas, label = 'training')
plt.plot(epoch, loss_val_fold2_pancreas, label = 'testing')
plt.ylabel('loss = log likelihood')
plt.xlabel('epochs')
plt.legend()

plt.subplot(224)
plt.plot(epoch, loss_train_fold0_pancreas, label = 'training')
plt.plot(epoch, loss_val_fold0_pancreas, label = 'testing')
plt.ylabel('loss = log likelihood')
plt.xlabel('epochs')
plt.legend()



In [None]:
plt.figure(figsize=(10,6))
plt.subplot(221)
plt.plot(epoch, c_index_train_fold0_pancreas, label = 'training')
plt.plot(epoch, c_index_val_fold0_pancreas, label = 'testing')
plt.ylabel('acc= c_index')
plt.xlabel('epochs')
plt.legend()

plt.subplot(222)
plt.plot(epoch, c_index_train_fold1_pancreas, label = 'training')
plt.plot(epoch, c_index_val_fold1_pancreas, label = 'testing')
plt.ylabel('acc= c_index')
plt.xlabel('epochs')
plt.legend()

plt.subplot(223)
plt.plot(epoch, c_index_train_fold3_pancreas, label = 'training')
plt.plot(epoch, c_index_val_fold3_pancreas, label = 'testing')
plt.ylabel('acc= c_index')
plt.xlabel('epochs')
plt.legend()

plt.subplot(224)
plt.plot(epoch, c_index_train_fold4_pancreas, label = 'training')
plt.plot(epoch, c_index_val_fold4_pancreas, label = 'testing')
plt.ylabel('acc= c_index')
plt.xlabel('epochs')
plt.legend()


### Summary of the results

In [None]:
print(np.mean([c_index_val_fold0_pancreas[1],c_index_val_fold1_pancreas[1],c_index_val_fold2_pancreas[1],c_index_val_fold3_pancreas[1],c_index_val_fold4_pancreas[1]]))
print(([c_index_val_fold0_pancreas[1],c_index_val_fold1_pancreas[1],c_index_val_fold2_pancreas[1],c_index_val_fold3_pancreas[1],c_index_val_fold4_pancreas[1]]))