In [1]:
#### Uncomment to download data and trained weights
!gdown 1O10h52XJJKnQl5oj68nWHQ0bXf8_Y38V ## Train Synthetic Data
!gdown 1Wdo7ywkP6H-CUYZ7tOym7A5bOLlBMdrc ## Original Test Data
!gdown 12na5PZOb0G-APQgrvGaDSa3_Qdx4xEHY ## MLP weights
!gdown 1W-5EsxP6l7z4YHxC40wcOeigl4KqXYdm ## CNN1D_MLP weights

Downloading...
From: https://drive.google.com/uc?id=1O10h52XJJKnQl5oj68nWHQ0bXf8_Y38V
To: /content/syntetic_train.csv
100% 195k/195k [00:00<00:00, 69.9MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Wdo7ywkP6H-CUYZ7tOym7A5bOLlBMdrc
To: /content/original_test.csv
100% 23.2k/23.2k [00:00<00:00, 39.7MB/s]
Downloading...
From: https://drive.google.com/uc?id=12na5PZOb0G-APQgrvGaDSa3_Qdx4xEHY
To: /content/MLP.pth
100% 10.6k/10.6k [00:00<00:00, 23.4MB/s]
Downloading...
From: https://drive.google.com/uc?id=1W-5EsxP6l7z4YHxC40wcOeigl4KqXYdm
To: /content/MLP_CNN1d.pth
100% 9.10k/9.10k [00:00<00:00, 22.3MB/s]


# Required Libs

In [2]:
import pandas as pd
from string import ascii_letters
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import sklearn
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, balanced_accuracy_score

In [3]:
df_synthetic_train = pd.read_csv('/content/syntetic_train.csv')
df_original_test   = pd.read_csv('/content/original_test.csv')

In [4]:
# Nolmalise using MinMax scaling on target
# MinMax Scaling

min_train = df_synthetic_train['RelapseFreeSurvival (outcome)'].min()
max_train = df_synthetic_train['RelapseFreeSurvival (outcome)'].max()

min_test = df_original_test['RelapseFreeSurvival (outcome)'].min()
max_test = df_original_test['RelapseFreeSurvival (outcome)'].max()

df_synthetic_train['RelapseFreeSurvival (outcome)'] = (df_synthetic_train['RelapseFreeSurvival (outcome)'] - min_train)/(max_train - min_train)
df_original_test['RelapseFreeSurvival (outcome)']   = (df_original_test['RelapseFreeSurvival (outcome)'] - min_test)/(max_test - min_test)

# Defined Models

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class MLP_Regressor(nn.Module):
    def __init__(self, input_shape):
        super(MLP_Regressor, self).__init__()

        self.cf1 = nn.Linear(input_shape, 32)

        self.cf2 = nn.Linear(32, 16)
        self.cf3 = nn.Linear(16, 1)

        self.relu    = nn.ReLU()
        self.batchnorm1 = nn.BatchNorm1d(32)
        self.batchnorm2 = nn.BatchNorm1d(16)

        self.dropout1 = nn.Dropout(0.3)

    def forward(self, x):
        x  = self.relu(self.batchnorm1(self.cf1(x)))
        x  = self.dropout1(x)
        x  = self.relu(self.batchnorm2(self.cf2(x)))
        x  = self.cf3(x)

        return x

class CNN1D_Regressor(nn.Module):
    def __init__(self, input_shape):
        super(CNN1D_Regressor, self).__init__()

        self.cnn1 = nn.Conv1d(1, 2, 2, stride=2)
        self.cnn2 = nn.Conv1d(2, 4, 4, stride=3)

        self.cf1 = nn.Linear(input_shape, 16)
        self.cf2 = nn.Linear(8, 1)

        self.relu    = nn.ReLU()

        self.batchnorm1 = nn.BatchNorm1d(2)
        self.batchnorm2 = nn.BatchNorm1d(4)
        self.batchnorm3 = nn.BatchNorm1d(8)

        self.dropout1 = nn.Dropout(0.3)
        self.dropout2 = nn.Dropout(0.3)

    def forward(self, x):
        x = self.relu(self.cf1(x))
        x = self.dropout1(x)

        x = torch.unsqueeze(x, 1)
        x = self.batchnorm1(self.relu(self.cnn1(x)))
        x = self.dropout2(x)

        x = self.batchnorm2(self.relu(self.cnn2(x)))

        x = torch.reshape(x, (x.shape[0], x.shape[1] * x.shape[2]))

        x = self.cf2(x)

        return x

# Training Loop

In [8]:
# def MLP_RegressorLoop(model, criterion, optimizer, scheduler, epochs, X_train, y_train, X_val, y_val):
#     val_loss_his = []
#     train_loss_his = []
#     count = 1
#     MODEL = []
#     for eph in range(epochs):
#         loss_epoch_train = []
#         loss_epoch_val = []

#         model.train()

#         optimizer.zero_grad()

#         X_train, y_train = X_train.to(device), y_train.to(device)

#         output = model(X_train)

#         loss = criterion(output, y_train.unsqueeze(1))

#         loss_epoch_train.append(loss.cpu().detach().numpy())

#         loss.backward()
#         optimizer.step()

#         train_loss_his.append(np.mean(loss_epoch_train))

#         # Run the validation batches
#         model.eval()
#         with torch.no_grad():

#             X_val, y_val = X_val.to(device), y_val.to(device)

#             out_val = model(X_val)

#             loss = criterion(out_val.cpu(), y_val.cpu())

#             loss_epoch_val.append(loss.item())

#         val_loss_his.append(np.mean(loss_epoch_val))
#         scheduler.step(np.mean(loss_epoch_val))

#         print('MSE', loss.numpy())

#         if eph > 1:
#             if np.mean(loss_epoch_val) < min(val_loss_his[0:-1]):
#                 count = 0
#                 print(str(eph) + ' Val loss improve form!! :' + str(min(val_loss_his[0:-1])) + ' to ' + str(np.mean(loss_epoch_val)) )
#                 MODEL.append(model)
#                 # print('------------------Save Model!----------------')

#             if np.mean(loss_epoch_val) > min(val_loss_his[0:-1]):
#                 # print('Val loss is not improve form ' + str(min(val_loss_his))  )
#                 count += 1
#                 if count == 10:
#                     print('Stop Training')

#                     return MODEL[-1]

#                     break

In [9]:
# Set train test validation, exclude label column
dftrain = df_synthetic_train[df_synthetic_train.columns[1:]].values.tolist()[40:]
dfval   = df_synthetic_train[df_synthetic_train.columns[1:]].values.tolist()[0:40]
dftest  = df_original_test[df_original_test.columns[1:]].values.tolist()

y_train = df_synthetic_train['RelapseFreeSurvival (outcome)'].reset_index(drop=True)[40:]
y_val   = df_synthetic_train['RelapseFreeSurvival (outcome)'].reset_index(drop=True)[0:40]
y_test  = df_original_test['RelapseFreeSurvival (outcome)'].reset_index(drop=True)

# To pytorch tensor
X_train = torch.tensor(dftrain, dtype=torch.float32)
X_val   = torch.tensor(dfval, dtype=torch.float32)
X_test  = torch.tensor(dftest, dtype=torch.float32)

y_train = torch.tensor(y_train.tolist(), dtype=torch.float32)
y_val   = torch.tensor(y_val.tolist(), dtype=torch.float32)
y_test  = torch.tensor(y_test.tolist(), dtype=torch.float32)

In [10]:
# Uncomment if you want to train
# epochs = 200
# # model = CNN1D_Regressor(X_train.shape[-1]) # Create model CNN1D
# model = MLP_Regressor(X_train.shape[-1]) # Create model CNN1D

# criterion = nn.MSELoss() # nn.L1Loss()# nn.MSELoss()
# optimizer = torch.optim.Adam(model.parameters(), lr= 0.01)
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.6,
#                                      patience=5, min_lr= 0.00001 ,verbose = True)

# model_trained = MLP_RegressorLoop(model, criterion, optimizer, scheduler, epochs,
#                                     X_train, y_train, X_val, y_val)

In [11]:
# Save Weights
# torch.save(model_trained.state_dict(), "/content/MLP_CNN1d.pth") # CNN1D_MLP
# torch.save(model_trained.state_dict(), "/content/MLP.pth") # MLP

In [12]:
# Create Models
# model_trained = CNN1D_Regressor(X_train.shape[-1]) # Create CNN1D_MLP
model_trained = MLP_Regressor(X_train.shape[-1]) # Create MLP

In [13]:
# Load Weights
# model_trained.load_state_dict(torch.load("/content/MLP_CNN1d.pth")) # CNN1D_MLP
model_trained.load_state_dict(torch.load("/content/MLP.pth")) # MLP

<All keys matched successfully>

In [14]:
# Predict on test set
from sklearn.metrics import r2_score

model_trained.eval()
with torch.no_grad():
  prd = model_trained(X_test)

print('R2 ', r2_score(y_test, prd))
print('MAE ', mean_absolute_error(y_test, prd))

R2  -0.009025655389458986
MAE  0.15294528
