In [1]:
#import required packages
#from tensorflow.python.keras import backend as K
import keras
from keras.layers import Flatten
from keras.layers import Dropout
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from tensorflow import keras
from keras import backend

from tensorflow.python.framework import ops
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split
from hyperopt import fmin, tpe, hp,rand, space_eval
import pandas as pd
import tensorflow
#import tensorflow as tf
import numpy as np
import os

In [2]:
#define the model creating method and relevent metric r2
def r2(y_true, y_pred):
    SSE = backend.sum(backend.square(y_pred - y_true))
    SST = backend.sum(backend.square(y_true-backend.mean(y_true)))
    r2 = 1-SSE/SST
    return r2

def create_model(input_length, hyperparameter):
    num_dense_layers = hyperparameter[0]
    num_dense_nodes = hyperparameter[1]
    activation = hyperparameter[2]
    learning_rate = hyperparameter[3]
    adam_decay = hyperparameter[4]
    drop_out = hyperparameter[5]
    model = Sequential()
    model.add(Flatten(input_shape=[input_length, 1]))

    for i in range(int(num_dense_layers)):
        name = 'layer_dense_{0}'.format(i + 1)
        model.add(Dense(num_dense_nodes, activation=activation, name=name ))
        model.add(Dropout(drop_out)),

    opt = Adam(learning_rate=learning_rate, decay=adam_decay)
    model.add(Dense(1, activation='linear'))
    model.compile(loss="mse",optimizer=opt, metrics=['mse',r2])
    return model

In [3]:
# define the optimization function
def fitness(parameters):
    hyperparameter = [int(parameters["num_dense_layers"]), int(parameters["num_dense_nodes"]),
                        parameters["activation"], parameters["learning_rate"],
                      parameters["adam_decay"],parameters["drop_out"]]
    input_length = len(train_feature[0])
    kfold = KFold(n_splits=5, shuffle=True, random_state=42)
    test_tot_pred = []
    test_tot_real = []
    for train, test in kfold.split(train_feature, train_output):
        model = create_model(input_length, hyperparameter)
        Scaler = MinMaxScaler()
        Scaler.fit(train_feature[train])
        Trains = Scaler.transform(train_feature[train])
        Tests = Scaler.transform(train_feature[test])
        Trains = np.reshape(Trains, (len(Trains), input_length, 1))
        Tests = np.reshape(Tests, (len(Tests), input_length, 1))
        model.fit(Trains,train_output[train], batch_size=200, epochs=50, verbose=0)
        test_tot_pred = np.append(test_tot_pred, model.predict(Tests))
        test_tot_real = np.append(test_tot_real, train_output[test])
        del model
        keras.backend.clear_session()
        ops.reset_default_graph()
    MSE = mean_squared_error(test_tot_real,test_tot_pred)
    r2 = r2_score(test_tot_real,test_tot_pred)
    global parameter_inputs
    parameter_input_values = parameters
    parameter_input = pd.DataFrame(parameter_input_values, index=[0])
    parameter_input["mse_train"] = MSE
    parameter_input["r2_train"] = r2
    parameter_inputs = parameter_inputs.append(parameter_input, sort=False)
    parameter_inputs.to_csv(save_path)

    print()
    print("MSE:{:.2f}, R2:{:.2f}, HidLays:{}, NNeurons:{}".format(MSE, r2, hyperparameter[0],hyperparameter[1]))
    print()
    return MSE

space={'learning_rate': hp.loguniform('learning_rate', np.log(1e-6),np.log(1e-2)),
    'num_dense_layers': hp.quniform('num_dense_layers', 1, 3, 1),
    'num_dense_nodes': hp.quniform('num_dense_nodes', 20, 2048, 2),
    'activation': hp.choice('activation', ['relu', 'selu', 'sigmoid', 'linear']),
    'adam_decay': hp.loguniform('adam_decay', np.log(1e-6), np.log(1e-2)),
    'drop_out': hp.uniform('drop_out', 0, 0.5)}

In [4]:
Sourcepath =os.path.join("./source_data.csv")
source_data= pd.read_csv(Sourcepath)
train_data,test_data  = train_test_split(source_data,test_size=0.2)
train_input = train_data.iloc[:, 10:].dropna(axis='columns')
for OutputIndex in ["E", "S", "A", "B"]:
    train_feature = train_input.values
    train_output = train_data[OutputIndex].values
    save_path = "./"+OutputIndex+"_"+"PaDEL_hyperparameters.csv"
    parameter_inputs = pd.DataFrame({"num_dense_layers":[],"num_dense_nodes":[]
                     ,"learning_rate":[],"activation":[]
                     ,"adam_decay":[],"mse_train":[],"r2_train":[]})
    best = fmin(fitness, space=space, algo=tpe.suggest, max_evals=10)

0.018671412809133554                                  
0.9686831748553386                                    
MSE:0.02, R2:0.97, HidLays:3, NNeurons:442            
0.05201525983761111                                                               
0.9127568538149531                                                                
MSE:0.05, R2:0.91, HidLays:1, NNeurons:1790                                       
0.04747410814310304                                                               
0.9203735486535314                                                                
MSE:0.05, R2:0.92, HidLays:2, NNeurons:124                                        
0.018420913630056573                                                              
0.969103327259992                                                                 
MSE:0.02, R2:0.97, HidLays:2, NNeurons:972                                        
0.019639552338093582                                                              
0.967