# Import libraies and packages

In [48]:
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import pandas as pd
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.suggest import ConcurrencyLimiter

In [50]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Model

In [51]:
import tensorflow as tf

def ConvNet(config, len_classes=2):
    input = tf.keras.layers.Input(shape=(43893, 1))
    x = input
    x = tf.keras.layers.Conv1D(filters=config['conv_block1_filters'], kernel_size=(8), strides=1)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation('relu')(x)

    if config['fc_layer_type'] == 'dense':
        if config['pool_type'] == 'max':
            x = tf.keras.layers.GlobalMaxPooling1D()(x)
        else:
            x = tf.keras.layers.GlobalAveragePooling1D()(x)

        # Fully connected layer 1
        x = tf.keras.layers.Dense(units=config['fc1_units'])(x)
        x = tf.keras.layers.Dropout(config['dropout_rate'])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation('relu')(x)

        # Fully connected layer 2
        x = tf.keras.layers.Dense(units=len_classes)(x)
        x = tf.keras.layers.Dropout(config['dropout_rate'])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        predictions = tf.keras.layers.Dense(1,tf.keras.layers.Activation('sigmoid'))(x)

    else:
        # Fully connected layer 1
        x = tf.keras.layers.Conv1D(filters=config['fc1_units'], kernel_size=1, strides=1)(x)
        x = tf.keras.layers.Dropout(config['dropout_rate'])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation('relu')(x)


        # Fully connected layer 2
        x = tf.keras.layers.Conv1D(filters=len_classes, kernel_size=1, strides=1)(x)
        x = tf.keras.layers.Dropout(config['dropout_rate'])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        
        if config['pool_type'] == 'max':
            x = tf.keras.layers.GlobalMaxPooling1D()(x)
        else:
            x = tf.keras.layers.GlobalAveragePooling1D()(x)
        
        predictions = tf.keras.layers.Dense(1,tf.keras.layers.Activation('sigmoid'))(x)

    model = tf.keras.Model(inputs=input, outputs=predictions)
    
    print(model.summary())
    print(f'Total number of layers: {len(model.layers)}')

    return model
   

## Metrics

In [52]:
from keras import backend as K
import random


def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

## Training function

In [53]:
from sklearn import preprocessing

def train_mnist(config):
  path ='/content/drive/MyDrive/ART_Inv/CNN/Ray_Tune/Clinical_data_and_RNA_total_Features_PFS.csv'
  data_frame = pd.read_csv(path)
  
  from sklearn.model_selection import train_test_split
  X = data_frame.iloc[:,28:43921  ]   
  Y=[]
  for i in range (len(data_frame)):
      if data_frame.PFS[i]<3: # If PFS is lower than 3 months, I will consider it as NonResponder (NR)
          Y.append(0)
      else:
          Y.append(1)# If PFS is over 3 months, I will consider it as Responder (R)
  scaler = preprocessing.MinMaxScaler()
  names = X.columns
  d = scaler.fit_transform(X)
  X = pd.DataFrame(d, columns=names)
  XTrain, XTest, yTrain, yTest = train_test_split(X, Y, test_size=0.10, stratify = Y)
  # Convert sets to arrays
  XTrain = XTrain.values
  XTest = XTest.values
  # It is mandatory to transform Y list into array for trainning the model
  yTrain=np.array(yTrain)
  yTest=np.array(yTest)

  X_train = XTrain.reshape(XTrain.shape[0], 43893 , 1)
  X_test = XTest.reshape(XTest.shape[0], 43893, 1)
  X_train = X_train.astype('float32')
  X_test = X_test.astype('float32')
  # Create model
  model = ConvNet(config)

  # Compile model with losses and metrics
  model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate =config['lr']),
                  loss='binary_crossentropy',
                  metrics=['accuracy', f1_m,precision_m, recall_m
                           ])

  # Start model training
  history_m = model.fit(X_train, yTrain,
                      epochs=100,
                      validation_data=(X_test, yTest))

  history_m = {
  "loss": history_m.history["loss"][0],
  "val_loss": history_m.history["val_loss"][0],
  "accuracy": history_m.history["accuracy"][0],
  "val_accuracy": history_m.history["val_accuracy"][0]
  }
  return history_m

In [60]:
config = {
 'conv_block1_filters': 8,
 'dropout_rate': 0.3,
 'fc1_units': 16,
 'fc_layer_type': 'dense',
 'lr': 0.01,
 'pool_type': 'average'}

In [62]:
for i in range(5):
  history = train_mnist(config)

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 43893, 1)]        0         
                                                                 
 conv1d_2 (Conv1D)           (None, 43886, 8)          72        
                                                                 
 batch_normalization_6 (Batc  (None, 43886, 8)         32        
 hNormalization)                                                 
                                                                 
 activation_6 (Activation)   (None, 43886, 8)          0         
                                                                 
 global_average_pooling1d_2   (None, 8)                0         
 (GlobalAveragePooling1D)                                        
                                                                 
 dense_6 (Dense)             (None, 16)                144 