In [1]:
import numpy
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1"
from src.models.conv_model import build_model as build_conv_model
from src.data_loader.RNASeqStructLoader import RNASeqStructDataGenerator 
from src.models.conv_model import correlation_coefficient_loss, pearson_r

Using TensorFlow backend.


In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Conv1D, Dropout, Flatten, BatchNormalization, MaxPool1D, Activation
from tensorflow.keras.optimizers import RMSprop
import tensorflow as tf


In [3]:
from hyperopt import STATUS_OK
import numpy
from src.models.conv_model import build_model as build_conv_model
import tensorflow as tf
from src.evaluator.evaluator import Evaluator
from src.models.conv_model import correlation_coefficient_loss, pearson_r
import math
import tensorflow as tf
from time import time
from tqdm.keras import TqdmCallback
import sys
from tensorflow.keras.callbacks import CSVLogger, ModelCheckpoint

dependencies = {
    'correlation_coefficient_loss': correlation_coefficient_loss,
    'pearson_r': pearson_r
}

def objective(params):
    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True
    sess = tf.Session(config=config)
    
    print(params)
    lr = params['lr']
    conv_layers_1 = int(params['conv_layers_1'])
    dropout = params['dropout']
    kernel_size_1 = int(params['kernel_size_1'])
    filters_1 = int(params['filters_1'])
    kernel_size_2 = int(params['kernel_size_2'])
    filters_2 = int(params['filters_2'])
    conv_layers_2 = int(params['conv_layers_2'])
    dense_layers = int(params['dense_layers'])
    dense_layer_nodes = int(params['dense_layer_nodes'])
    pool1 = int(params['pool1'])
    pool2 = int(params['pool2'])
    
    
    model = Sequential()
    model.add(Input(shape=(101,7)))
    
    for x in range(0, conv_layers_1):
        model.add(Conv1D(filters=filters_1, kernel_size=kernel_size_1, padding='same'))
        #model.add(BatchNormalization())
        model.add(Activation("relu"))
    
    model.add(Conv1D(filters=filters_1, kernel_size=kernel_size_2, strides=pool1, padding='same')) 
               
    for x in range(0, conv_layers_2):
        model.add(Conv1D(filters=filters_2, kernel_size=kernel_size_2, padding='same'))
        #model.add(BatchNormalization())
        model.add(Activation("relu"))
     
    model.add(Conv1D(filters=filters_2, kernel_size=kernel_size_2, strides=pool2, padding='same')) 
    
    
    
    model.add(Flatten())
    
    for x in range(0, dense_layers):
        model.add(Dense(dense_layer_nodes, activation='relu'))
        model.add(Dropout(dropout))
    
    model.add(Dense(1, activation='sigmoid'))
    
    optimizer = tf.keras.optimizers.RMSprop(lr=math.pow(10, lr))
    
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['binary_crossentropy', 'mse', pearson_r])
    
    checkpoint_filepath = 'models/cDNA-ABE/logs/model_ckpt/{}.h5'.format(time())

    csv_logger = CSVLogger('log.csv', append=True, separator=';')
    model_ckpt = ModelCheckpoint(checkpoint_filepath, monitor='val_loss', verbose=0, save_best_only=True)
    
    train_generator = RNASeqStructDataGenerator("data/processed/cDNA-ABE/train_data.hdf5", 1024)
    validation_generator = RNASeqStructDataGenerator("data/processed/cDNA-ABE/validation_data.hdf5", 256)
    
    log = open("models/cDNA-ABE/logs/{}.log".format(time()), "a")
    sys.stdout = log
    print(model.summary())
    history = model.fit(x=train_generator, epochs=6, validation_data=validation_generator, callbacks=[model_ckpt, csv_logger], use_multiprocessing=True, workers=5, verbose=2)
    sys.stdout = sys.__stdout__
    log.close();
    del model
    min_loss = min(history.history['val_loss'])
    tf.reset_default_graph()
    return min_loss

In [4]:

#
# space = {
#     'lr': hp.uniform('lr', -4.5, -2),
#     'dropout': hp.uniform('dropout', 0.05, 0.3),
#     'conv_layers_1': hp.quniform('conv_layers_1', 2, 5, 1),
#     'kernel_size_1': hp.quniform('kernel_size_1', 4, 64, 1), 
#     'filters_1': hp.quniform('filters_1', 32, 192, 1), 
#     'kernel_size_2': hp.quniform('kernel_size_2', 4, 64, 1),
#     'filters_2': hp.quniform('filters_2', 16, 96, 1),
#     'conv_layers_2': hp.quniform('conv_layers_2', 2, 5, 1),
#     'dense_layers': hp.quniform('dense_layers', 0, 3, 1),
#     'dense_layer_nodes': hp.quniform('dense_layer_nodes', 16, 64, 1),
#     'pool1': hp.quniform('pool1', 1, 3, 1),
#     'pool2': hp.quniform('pool2', 1, 3, 1)
# }

In [5]:
from hyperopt import tpe, hp, fmin
import numpy as np
def test_objective(params):
    return math.pow(float(params['x']),2)

In [6]:
from hyperopt import Trials
import pickle
import os
from hyperopt import trials_from_docs
def regenerateAcc():
    accumulator = Trials()
    trs = list()
    
    for file in os.listdir("models/cDNA-ABE/bayesian_opt/trials"):
        if file.endswith(".p"):
            trs.append(pickle.load(open(os.path.join("models/cDNA-ABE/bayesian_opt/trials", file), "rb")))
    
    for trial in trs:
        for a in list(trial):
            if not a in accumulator:
                accumulator = trials_from_docs([a]+list(accumulator))
        
    pickle.dump(accumulator, open("models/cDNA-ABE/bayesian_opt/accumulator.p", "wb"))

In [7]:
import pickle
from hyperopt import tpe, hp, fmin
from hyperopt import Trials
def run_trials(n):

    trials_step = 1  # how many additional trials to do after loading saved trials. 1 = save after iteration
    max_trials = 0

    
    try:  # try to load an already saved trials object, and increase the max
        trials = pickle.load(open("models/cDNA-ABE/bayesian_opt/accumulator.p", "rb"))
        print("Found saved Trials! Loading...")
        space = pickle.load(open("models/cDNA-ABE/bayesian_opt/space.p", "rb"))
        print("Found saved Search Space! Loading...")
        max_trials = len(trials.trials) + trials_step
        print("Rerunning from {} trials to {} (+{}) trials".format(len(trials.trials), max_trials, trials_step))
    except:  # create a new trials object and start searching
        print("ISSUE WITH LOADING SAVED TRIALS")
        return;
    best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=max_trials, trials=trials)

    print("Best:", best)
    
    # save the trials object
    with open("models/cDNA-ABE/bayesian_opt/trials/trial-{}.p".format(n), "wb") as f:
        pickle.dump(trials, f)
    regenerateAcc()

In [8]:
regenerateAcc()

In [None]:
import os
while True:
    run_trials(1)

Found saved Trials! Loading...
Found saved Search Space! Loading...
Rerunning from 106 trials to 107 (+1) trials
{'conv_layers_1': 5.0, 'conv_layers_2': 5.0, 'dense_layer_nodes': 53.0, 'dense_layers': 1.0, 'dropout': 0.07239445191120981, 'filters_1': 183.0, 'filters_2': 89.0, 'kernel_size_1': 4.0, 'kernel_size_2': 26.0, 'lr': -4.064634750691902, 'pool1': 1.0, 'pool2': 3.0}
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
