#Import libraies and packages

In [None]:
pip install ray torch torchvision

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ray
  Downloading ray-2.1.0-cp37-cp37m-manylinux2014_x86_64.whl (59.1 MB)
[K     |████████████████████████████████| 59.1 MB 10.3 MB/s 
Collecting virtualenv>=20.0.24
  Downloading virtualenv-20.16.7-py3-none-any.whl (8.8 MB)
[K     |████████████████████████████████| 8.8 MB 62.1 MB/s 
Collecting platformdirs<3,>=2.4
  Downloading platformdirs-2.5.4-py3-none-any.whl (14 kB)
Collecting distlib<1,>=0.3.6
  Downloading distlib-0.3.6-py2.py3-none-any.whl (468 kB)
[K     |████████████████████████████████| 468 kB 57.2 MB/s 
Installing collected packages: platformdirs, distlib, virtualenv, ray
Successfully installed distlib-0.3.6 platformdirs-2.5.4 ray-2.1.0 virtualenv-20.16.7


In [None]:
pip install bayesian-optimization

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting bayesian-optimization
  Downloading bayesian_optimization-1.3.1-py3-none-any.whl (16 kB)
Installing collected packages: bayesian-optimization
Successfully installed bayesian-optimization-1.3.1


In [None]:
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import pandas as pd
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.suggest import ConcurrencyLimiter

  if sys.path[0] == '':


In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Model

In [None]:
import tensorflow as tf

def ConvNet(config, len_classes=2):
    input = tf.keras.layers.Input(shape=(43893, 1))
    x = input
    x = tf.keras.layers.Conv1D(filters=config['conv_block1_filters'], kernel_size=(8), strides=1)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation('relu')(x)

    if config['fc_layer_type'] == 'dense':
        if config['pool_type'] == 'max':
            x = tf.keras.layers.GlobalMaxPooling1D()(x)
        else:
            x = tf.keras.layers.GlobalAveragePooling1D()(x)

        # Fully connected layer 1
        x = tf.keras.layers.Dense(units=config['fc1_units'])(x)
        x = tf.keras.layers.Dropout(config['dropout_rate'])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation('relu')(x)

        # Fully connected layer 2
        x = tf.keras.layers.Dense(units=len_classes)(x)
        x = tf.keras.layers.Dropout(config['dropout_rate'])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        predictions = tf.keras.layers.Dense(3,tf.keras.layers.Activation('softmax'))(x)

    else:
        # Fully connected layer 1
        x = tf.keras.layers.Conv1D(filters=config['fc1_units'], kernel_size=1, strides=1)(x)
        x = tf.keras.layers.Dropout(config['dropout_rate'])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Activation('relu')(x)


        # Fully connected layer 2
        x = tf.keras.layers.Conv1D(filters=len_classes, kernel_size=1, strides=1)(x)
        x = tf.keras.layers.Dropout(config['dropout_rate'])(x)
        x = tf.keras.layers.BatchNormalization()(x)
        
        if config['pool_type'] == 'max':
            x = tf.keras.layers.GlobalMaxPooling1D()(x)
        else:
            x = tf.keras.layers.GlobalAveragePooling1D()(x)
        
        predictions = tf.keras.layers.Dense(3,tf.keras.layers.Activation('softmax'))(x)
    print(predictions)
    model = tf.keras.Model(inputs=input, outputs=predictions)
    
    print(model.summary())
    print(f'Total number of layers: {len(model.layers)}')

    return model
   

## Metrics

In [None]:
from keras import backend as K
import random
from sklearn.metrics import f1_score, precision_score, recall_score


def recall_m(y_true, y_pred):
    y_true.numpy()
    y_pred.numpy()
    recall = recall_score(y_true, np.argmax(y_pred, axis = 1), average='weighted', zero_division = 1)
    return recall

def precision_m(y_true, y_pred):
    y_true.numpy()
    y_pred.numpy()
    precision = precision_score(y_true, np.argmax(y_pred, axis = 1), average='weighted', zero_division = 1)
    return precision

def f1_m(y_true, y_pred):
    y_true.numpy()
    y_pred.numpy()
    f1 = f1_score(y_true, np.argmax(y_pred, axis = 1), average='weighted', zero_division = 1)
    return f1

## Training function

In [None]:
from sklearn import preprocessing

def train_mnist(config):
  path ='/content/drive/MyDrive/ART_Inv/CNN/Ray_Tune/Clinical_data_and_RNA_total_Features_PFS.csv'
  data_frame = pd.read_csv(path)
  
  from sklearn.model_selection import train_test_split
  X = data_frame.iloc[:,28:43921  ]   
  Y=[]
  for i in range (len(data_frame)):
      if data_frame.PFS[i]<3: # If PFS is lower than 3 months, I will consider it as NonResponder (NR)
          Y.append(0)
      elif data_frame.PFS[i]<6:
          Y.append(1)
      else:
          Y.append(2)# If PFS is over 3 months, I will consider it as Responder (R)
  scaler = preprocessing.MinMaxScaler()
  names = X.columns
  d = scaler.fit_transform(X)
  X = pd.DataFrame(d, columns=names)
  XTrain, XTest, yTrain, yTest = train_test_split(X, Y, test_size=0.20, stratify = Y)
  # Convert sets to arrays
  XTrain = XTrain.values
  XTest = XTest.values
  # It is mandatory to transform Y list into array for trainning the model
  yTrain=np.array(yTrain)
  yTest=np.array(yTest)

  X_train = XTrain.reshape(XTrain.shape[0], 43893 , 1)
  X_test = XTest.reshape(XTest.shape[0], 43893, 1)
  X_train = X_train.astype('float32')
  X_test = X_test.astype('float32')
  # Create model
  model = ConvNet(config)
  # Compile model with losses and metrics
  model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate =config['lr']),
                # tf.keras.optimizers.RMSprop(learning_rate =config['lr']),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy', f1_m, precision_m, recall_m], run_eagerly=True)
  # Start model training
  history_m = model.fit(X_train, yTrain,
                      epochs=100,
                      validation_data=(X_test, yTest))
  history_m = {
  "loss": history_m.history["loss"][0],
  "val_loss": history_m.history["val_loss"][0],
  "accuracy": history_m.history["accuracy"][0],
  "val_accuracy": history_m.history["val_accuracy"][0],
  "val_f1_m": history_m.history["val_f1_m"][0]
  }
  return history_m

# Bayesian Optimization

In [None]:
config = {
 'conv_block1_filters': 32,
 'dropout_rate': 0.4,
 'fc1_units': 64,
 'fc_layer_type': 'convolution',
 'lr': 0.01,
 'pool_type': 'average'}

In [None]:
for i in range(5):
  history = train_mnist(config)

KerasTensor(type_spec=TensorSpec(shape=(None, 3), dtype=tf.float32, name=None), name='dense/activation_2/Softmax:0', description="created by layer 'dense'")
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 43893, 1)]        0         
                                                                 
 conv1d (Conv1D)             (None, 43886, 32)         288       
                                                                 
 batch_normalization (BatchN  (None, 43886, 32)        128       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 43886, 32)         0         
                                                                 
 conv1d_1 (Conv1D)           (None, 43886, 64)         2112      
                                    