# Packages

In [1]:
%matplotlib inline
%load_ext tensorboard
!rm -rf ./logs/ 

import os
import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from itertools import product
from sklearn.svm import SVC
from sklearn.metrics import auc, roc_curve
from sklearn.metrics import confusion_matrix
from sklearn.feature_selection import SelectPercentile, SelectKBest
from sklearn.feature_selection import chi2, mutual_info_classif
from tensorboard.plugins.hparams import api as hp
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import VarianceThreshold
from rdkit import Chem
from mordred import Calculator, descriptors

plt.style.use("ggplot")
matplotlib.rcParams.update({'font.size': 24})
PATH_DATA = "../datasets/CYP/"
print(tf.__version__)

2.0.0


## L model DNN

Generate DNN with L layers

In [2]:
def generate_model(layers_dim, lr, dropout, optimizer, L2):
    """layers_dim -- [n_input, n_hid_1, ..., n_output=1]"""
    hidden_layers = []
    for i in range(1,len(layers_dim)-1): hidden_layers.extend([tf.keras.layers.Dropout(dropout)] + [tf.keras.layers.Dense(layers_dim[i], activation="relu", kernel_regularizer=tf.keras.regularizers.l2(L2))])
    
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(layers_dim[0], activation='relu', input_shape=(layers_dim[0],))] +
        hidden_layers + 
        [tf.keras.layers.Dense(layers_dim[-1], activation="sigmoid")])
    loss_function = tf.keras.losses.BinaryCrossentropy()
    model.compile(optimizer=optimizer, loss=loss_function, metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()]) 
    return model

Callbacks are useful to stop learning when some condition is reached (among other things I guess)

In [None]:
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy')>0.90):
          print("\n Reached 80% accuracy on training set so cancelling training!")
          self.model.stop_training = True
        if(logs.get('val_accuracy')>0.80):
          print("\n Reached 70% accuracy on validation set so cancelling training!")
          self.model.stop_training = True

callbacks = myCallback()

Example of using DNN of L layers with callbacks

In [None]:
layers_dim = [train_data.shape[1], 15, 5, 1]
lr = 0.001
dropout = 0.2
optimizer = 'RMSprop'
L2 = 0.001

model_small_dataset =generate_model(layers_dim, lr, dropout, optimizer, L2)
#model_small_dataset.summary()

history = model_small_dataset.fit(
      train_data,train_labels,
      epochs=50,
      verbose=2,
      #validation_data = (mini_testing_2c9_data, mini_labels_testing_2c9_data)
      validation_data = (test_data, test_labels)
      )

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

## Loading data

In [None]:
PATH_DATA = "../datasets/CYP/"

shared_data = pd.read_csv(os.path.join(PATH_DATA, "shared_set_cyp.csv"))
labels_2c9 = (shared_data["p450-cyp2c9 Activity Outcome"] == "Active").values.astype(int)
labels_3a4 = (shared_data["p450-cyp3a4 Activity Outcome"] == "Active").values.astype(int)
testing_2c9_data = pd.read_csv(os.path.join(PATH_DATA, "only_2c9_set_cyp.csv"))
labels_testing_2c9 = (testing_2c9_data["p450-cyp2c9 Activity Outcome"] == "Active").values.astype(int)
testing_3a4_data = pd.read_csv(os.path.join(PATH_DATA, "only_3a4_set_cyp.csv"))
labels_testing_3a4 = (testing_3a4_data["p450-cyp3a4 Activity Outcome"] == "Active").values.astype(int)

#features_shared = np.load(os.path.join("features", "shared_set_features.npy"))
#features_only_2c9 = np.load(os.path.join("features", "only_2c9_set_features.npy"))
#features_only_3a4 = np.load(os.path.join("features", "only_3a4_set_features.npy"))

## Getting features (fingerprints)

There are many [fingerprints](http://rdkit.org/UGM/2012/Landrum_RDKit_UGM.Fingerprints.Final.pptx.pdf):

- Morgan
- MACCS
- RDKit fingerprint (to do)

Posar link paper que compara fingerprints (Dani)

#### Morgan

In [None]:
def get_features(input_sdf):
    structures_shared = Chem.SDMolSupplier(input_sdf)
    features = []
    for mol in structures_shared:
        fp = AllChem.GetMorganFingerprintAsBitVect(mol,2,nBits=1024)
        arr = np.zeros((0,), dtype=np.int8)
        DataStructs.ConvertToNumpyArray(fp,arr)
        features.append(arr)
    return np.array(features)

if os.path.exists(os.path.join("features", "shared_set_features.npy")):
    features_shared = np.load(os.path.join("features", "shared_set_features.npy"))
else:
    features_shared = get_features(os.path.join(PATH_DATA, "shared_set_cyp.sdf"))
    np.save(os.path.join("features", "shared_set_features.npy"), features_shared)

#### MACCS

In [None]:
def get_features(input_sdf):
    structures_shared = Chem.SDMolSupplier(input_sdf)
    features = []
    for mol in structures_shared:
        fp = MACCSkeys.GenMACCSKeys(mol)
        arr = np.zeros((0,), dtype=np.int8)
        DataStructs.ConvertToNumpyArray(fp,arr)
        features.append(arr)
    return np.array(features)

if os.path.exists(os.path.join("features", "shared_set_features_MACCS.npy")):
    features_shared = np.load(os.path.join("features", "shared_set_features_MACCS.npy"))
else:
    features_shared = get_features(os.path.join(PATH_DATA, "shared_set_cyp.sdf"))
    np.save(os.path.join("features", "shared_set_features_MACCS.npy"), features_shared)
    
if os.path.exists(os.path.join("features", "only_2c9_set_features_MACCS.npy")):
    features_only_2c9 = np.load(os.path.join("features", "only_2c9_set_features_MACCS.npy"))
else:   
    features_only_2c9 = get_features(os.path.join(PATH_DATA, "only_2c9_set_cyp.sdf"))
    np.save(os.path.join("features", "only_2c9_set_features_MACCS.npy"), features_only_2c9)
    
if os.path.exists(os.path.join("features", "only_3a4_set_features_MACCS.npy")):
    features_only_3a4 = np.load(os.path.join("features", "only_3a4_set_features_MACCS.npy"))
else:   
    features_only_3a4 = get_features(os.path.join(PATH_DATA, "only_3a4_set_cyp.sdf"))
    np.save(os.path.join("features", "only_3a4_set_features_MACCS.npy"), features_only_3a4)

#### RDKit

In [None]:
def get_features(input_sdf):
    structures_shared = Chem.SDMolSupplier(input_sdf)
    features = []
    for mol in structures_shared:
        fp = Chem.RDKFingerprint(mol)
        arr = np.zeros((0,), dtype=np.int8)
        DataStructs.ConvertToNumpyArray(fp,arr)
        features.append(arr)
    return np.array(features)

if os.path.exists(os.path.join("features", "shared_set_features_RDKIT.npy")):
    features_shared = np.load(os.path.join("features", "shared_set_features_RDKIT.npy"))
else:
    features_shared = get_features(os.path.join(PATH_DATA, "shared_set_cyp.sdf"))
    np.save(os.path.join("features", "shared_set_features_RDKIT.npy"), features_shared)
    
if os.path.exists(os.path.join("features", "only_2c9_set_features_RDKIT.npy")):
    features_only_2c9 = np.load(os.path.join("features", "only_2c9_set_features_RDKIT.npy"))
else:   
    features_only_2c9 = get_features(os.path.join(PATH_DATA, "only_2c9_set_cyp.sdf"))
    np.save(os.path.join("features", "only_2c9_set_features_RDKIT.npy"), features_only_2c9)
    
if os.path.exists(os.path.join("features", "only_3a4_set_features_RDKIT.npy")):
    features_only_3a4 = np.load(os.path.join("features", "only_3a4_set_features_RDKIT.npy"))
else:   
    features_only_3a4 = get_features(os.path.join(PATH_DATA, "only_3a4_set_cyp.sdf"))
    np.save(os.path.join("features", "only_3a4_set_features_RDKIT.npy"), features_only_3a4)

## Getting physicochemical descriptors

Molecular descriptors are widely employed to present molecular characteristics in cheminformatics. Various molecular-descriptor-calculation software programs have been developed.

- [Mordred](https://jcheminf.biomedcentral.com/articles/10.1186/s13321-018-0258-y) (To do)
- PaDEL - Descriptor
- ...


In [None]:
from mordred import Calculator, descriptors

# create descriptor calculator with all descriptors
calc = Calculator(descriptors, ignore_3D=True)
#len(calc.descriptors)

class_col = shared_data['p450-cyp2c9 Activity Outcome']
class_arr = class_col.to_numpy()
class_arr = np.reshape(class_arr, [class_arr.shape[0],1])
class_arr = np.squeeze(class_arr)
smi_col = shared_data['CanonicalSMILES']
smi_arr = smi_col.to_numpy()
smi_arr = np.reshape(smi_arr, [smi_arr.shape[0],1])
smi_arr = np.squeeze(smi_arr)

# calculate descriptors for a single molecule using smile
mol = Chem.MolFromSmiles(smi_arr[300])
calc(mol)[:3]

# get descriptors in a df
df_descriptors = calc.pandas(mols_short)
#df_descriptors['SLogP']

## Detecting and removing outliers

Before selecting features, we will remove the examples that contain outliers using the Z score criteria.

$Z = \frac{X-\mu}{\sigma}$

Z-scores can quantify the unusualness of an observation when your data follow the normal distribution. Z-scores are the number of standard deviations above and below the mean that each value falls. For example, a Z-score of 2 indicates that an observation is two standard deviations above the average while a Z-score of -2 signifies it is two standard deviations below the mean. A Z-score of zero represents a value that equals the mean. [Source](https://statisticsbyjim.com/basics/outliers/)

In [None]:
def compute_z_score(df_original):
    df=df_original.copy()
    headers = []
    for col in df.columns:
        df[f'{col}_zscore'] = (df[col] - df[col].mean())/df[col].std(ddof=0)
        headers.append(col)
    return df, headers

def outliers_detection(df, threshold=3):
    df_scored, headers=compute_z_score(df)
    zscore_col = list(set(df_scored.columns) - set(headers)) # to only evaluate zscore columns
    for col in zscore_col:
        df_scored[f'{col}_outlier'] = (abs(df_scored[f'{col}'])> threshold).astype(int)
    return df_scored, zscore_col

def drop_outliers(df, threshold=3):
    df_outlier, zscore_col = outliers_detection(df, threshold=threshold)
    for col in zscore_col:
        index = df_outlier[ df_outlier[f'{col}_outlier'] == 1 ].index
        df_outlier.drop(index , inplace=True)
        df_outlier.drop(col , inplace=True,axis = 1)
        df_outlier.drop(f'{col}_outlier' , inplace=True, axis = 1)
    return df_outlier

#### !! The following function should work but it doesn't. 

It is more elegant that the one above. Moreover, I think it could be more efficient.

The error that I get is:

*RuntimeWarning: invalid value encountered in less
This is separate from the ipykernel package so we can avoid doing imports until*

In [None]:
def drop_outliers(df_original, threshold=3):
    df=df_original.copy()
    return df[(np.abs(stats.zscore(df)) < threshold).all(axis=1)]
    
df_2 = drop_outliers(descriptors_only2c9.astype('float64'),threshold=100)

## Descriptors normalisation 

Here we have an example of how to normalise with Skit learn `normalize()` function.

Each column (containig a single descriptor) is divided by the maximum value.

This step should be always carried out after removing outliers.

In [None]:
norm_descriptors_shared = pd.DataFrame(normalize(descriptors_shared, norm='max', axis=0))

## Ferature selection

There are mainly [two ways](https://machinelearningmastery.com/feature-selection-with-categorical-data/) of chossing features of a dataset taking into account it's labels (`select_features()`):

- [Chi square](https://scikit-learn.org/stable/modules/feature_selection.html#univariate-feature-selection) (`chi2`): [Pearson test](https://stackoverflow.com/questions/25792012/feature-selection-using-scikit-learn) is only valid for positive values.
- [Mutual info classification](https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.mutual_info_classif.html) (`mutual_info_classif`)

Also we can remove all features whose variance doesn’t meet some threshold, independently of how are they classified.
By default, it removes all zero-variance features, i.e. features that have the same value in all samples (`select_features_threshold`):

- [Removing features with low variance](https://scikit-learn.org/stable/modules/feature_selection.html#univariate-feature-selection): The smaller threshold ($p$), the more restrictive is the function, since the smaller will be the variance threshold:

    $Var_{threshold} =  p(1-p)<Var_{x} \rightarrow Accepted$

We can combine both ways to select features `select_features_comb()`:
- First we will select the features taking into account the labels of the data, using `select_features()`.
- Then we will set a variance threshold to the selected features with `select_features_threshold()`.

In [15]:
def select_features_univar(X_train, Y_train, X_test, score_func=chi2, k_best=None, percentile = None):
    """score_func=chi2 (default), mutual_info_classif"""
    if not k_best == None:
        fs = SelectKBest(score_func=score_func, k=k_best)
    elif not percentile == None:
        fs = SelectPercentile(score_func=score_func, percentile=percentile)
    else:
        print("Introduce the number of best features to be kept (`k_best`) or the percentil.")
        return
    fs.fit(X_train, Y_train)
    X_train_fs = fs.transform(X_train)
    X_test_fs = fs.transform(X_test)
    return X_train_fs, X_test_fs, fs
    
def select_features_threshold(X_train, X_test, threshold=0.9):
    """It removes all features whose variance doesn’t meet some threshold. It doesn't take into account the """
    sel = VarianceThreshold(threshold=(threshold * (1 - threshold)))
    X_train_threshold = sel.fit_transform(X_train)
    X_test_threshold = sel.transform(X_test)
    return X_train_threshold, X_test_threshold

def select_features_comb(X_train, Y_train, X_test, score_func=chi2, k_best=None, percentile = None, threshold=0.9):
    X_train_fs, X_test_fs, _ = select_features_univar(X_train, Y_train, X_test, score_func=chi2, k_best=k_best, percentile = percentile)
    X_train_comb, X_test_comb = select_features_threshold(X_train_fs, X_test_fs, threshold=threshold)
    return X_train_comb, X_test_comb

def plot_score(fs, print_scores=False):
    """plot the score for all the features"""
    if print_scores:
        for i in range(len(fs.scores_)):
            print('Feature %d: %f' % (i, fs.scores_[i]))
    plt.bar([i for i in range(len(fs.scores_))], fs.scores_)
    plt.show()
    

## Hyperparameters tunning with Tensorboard

This code allows the exploration of the hyperparameters using Tensorboard.
The hyperparameter that are explored in a L layer NN with dropout at the first hidden layer are:

- Number of hidden layers 
- Number of neurons for the hidden layer (all the hidden layers in the NN will have the same number of neurons).
- Dropout (to avoid overfitting)
- Optimizer
- L2 regularizer (to avoid overfitting)
- Learning rate (LR)

In [None]:
hyperparam_tunning = True

HP_HIDDEN_LAYERS = hp.HParam("hidden_layers", hp.Discrete(list(range(3, 10))))
HP_NEURONS = hp.HParam("neurons", hp.Discrete([i for i in range(10,151,20)]))
HP_DROPOUT = hp.HParam("dropout", hp.RealInterval(0.2, 0.5))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'sgd','RMSprop']))
HP_L2 = hp.HParam('l2 regularizer', hp.RealInterval(.001,.01))
HP_LR = hp.HParam("learning_rate", hp.Discrete([0.001, 0.01, 0.1, 1.0, 10.0]))
N_BITS = train_data.shape[1]
    
def construct_optimizer(hparams):
    if hparams[HP_OPTIMIZER] == "adam":
        return tf.keras.optimizers.Adam(learning_rate = hparams[HP_LR])
    elif hparams[HP_OPTIMIZER] == "sgd":
        return tf.keras.optimizers.SGD(learning_rate = hparams[HP_LR])
    elif hparams[HP_OPTIMIZER] == "RMSprop":
        return tf.keras.optimizers.RMSprop(learning_rate = hparams[HP_LR])
    
def train_test_model(hparams):
    internal_layers = [tf.keras.layers.Dropout(hparams[HP_DROPOUT])]+[tf.keras.layers.Dense(hparams[HP_NEURONS], kernel_regularizer=tf.keras.regularizers.l2(hparams[HP_L2]), activation='relu') for _ in range(hparams[HP_HIDDEN_LAYERS])]
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(N_BITS, activation='relu', input_shape=(N_BITS,))]+
        internal_layers+[tf.keras.layers.Dense(1, activation="sigmoid")]
    )
    model.compile(optimizer=construct_optimizer(hparams), loss="binary_crossentropy", metrics=['accuracy'])
    model.fit(train_data, train_labels, epochs=10, verbose=2, class_weight=class_weight)
    _, results = model.evaluate(val_data, val_labels, verbose=0)                                                                                                
    _, results_val = model.evaluate(mini_testing_2c9_data, mini_labels_testing_2c9_data, verbose=0)
    return results, results_val 


def run(run_dir, hparams):
  with tf.summary.create_file_writer(run_dir).as_default():
    hp.hparams(hparams)  # record the values used in this trial
    accuracy, accuracy_val = train_test_model(hparams)
    tf.summary.scalar("accuracy", accuracy, step=1)
    tf.summary.scalar("accuracy_val", accuracy_val, step=1)

In [None]:
os.makedirs("hyperparameters_tunning/morgan_mordred_norm_6060_feature_selection", exist_ok=True)
with tf.summary.create_file_writer('hyperparameters_tunning/morgan_mordred_norm_6060_feature_selection').as_default():
    hp.hparams_config(hparams=[HP_HIDDEN_LAYERS,HP_NEURONS, HP_DROPOUT, HP_OPTIMIZER, HP_L2, HP_LR],
                      metrics=[hp.Metric("accuracy", display_name='Accuracy'), hp.Metric("accuracy_val", display_name="Validation_accuracy")]) 

In [None]:
%tensorboard --logdir hyperparameters_tunning/morgan_mordred_norm_6060_feature_selection/logs/hparam_tuning/

In [None]:
if hyperparam_tunning:
    session_num = 0
    looping = list(product(HP_NEURONS.domain.values, HP_HIDDEN_LAYERS.domain.values,[HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value], HP_OPTIMIZER.domain.values, [HP_L2.domain.min_value, HP_L2.domain.max_value], HP_LR.domain.values))
    random.shuffle(looping)
    #looping = product(HP_NEURONS.domain.values, HP_HIDDEN_LAYERS.domain.values, [HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value], HP_OPTIMIZER.domain.values, [HP_L2.domain.min_value, HP_L2.domain.max_value], HP_LR.domain.values)
    total_runs = len(list(looping))
    for neurons, hidden_lay, dropout, opt, l2, lr in looping:
        hp_params = {HP_NEURONS: neurons, HP_HIDDEN_LAYERS: hidden_lay, HP_DROPOUT: dropout, HP_OPTIMIZER: opt, HP_L2: l2, HP_LR: lr}
        if session_num % 10 == 0:
            # clear everything every 10 models to avoid oom errors
            tf.keras.backend.clear_session()
        run_name = f"run_{session_num}"
        print(f"---Starting trial: {run_name} of {total_runs}")
        print({h.name: hp_params[h] for h in hp_params})
        run('hyperparameters_tunning/morgan_mordred_norm_6060_feature_selection/logs/hparam_tuning/' + run_name, hp_params)
        session_num += 1


#### !! The following version does not work by the moment... Should be fixed

This code allows the exploration of the number of neurons for a prefix number of layer (-> the function `
train_test_model` has to be changed adding or removing layers...)

- It used to fail with sgd optimizer, that's why adagrad is used instead.

In [None]:
#HP_HIDDEN_LAYERS = hp.HParam("hidden_layers", hp.Discrete(list(range(2, 6))))
#per ara sense hidden layers com a hyperparametre. Primer mirarem com fer-ho amb diferents capes.
HP_NEURONS_1 = hp.HParam("neurons_1", hp.Discrete(list(range(5, 66, 5))))
HP_NEURONS_2 = hp.HParam("neurons_2", hp.Discrete(list(range(5, 66, 5))))
HP_NEURONS_3 = hp.HParam("neurons_3", hp.Discrete(list(range(5, 66, 5))))
HP_NEURONS_4 = hp.HParam("neurons_4", hp.Discrete(list(range(5, 66, 5))))
HP_NEURONS_5 = hp.HParam("neurons_5", hp.Discrete(list(range(5, 66, 5))))

HP_DROPOUT = hp.HParam("dropout", hp.RealInterval(0.2, 0.5))
HP_OPTIMIZER = hp.HParam('optimizer', hp.Discrete(['adam', 'adagrad','RMSprop'])) # 'sgd'
HP_L2 = hp.HParam('l2_regularizer', hp.RealInterval(.001,.01))
HP_LR = hp.HParam("learning_rate", hp.Discrete([0.0001, 0.001, 0.01, 0.1, 1.0]))

os.makedirs("hyperparameters_tunning/morgan_mordred_feature_selection", exist_ok=True)
with tf.summary.create_file_writer('hyperparameters_tunning/morgan_mordred_feature_selection/logs/hparam_tuning').as_default():
    hp.hparams_config(hparams=[HP_NEURONS_1, HP_NEURONS_2, HP_DROPOUT, HP_OPTIMIZER, HP_L2, HP_LR], # 2 hid_layers by the moment
                      metrics=[hp.Metric("accuracy", display_name='Accuracy')])#, hp.Metric('precision', display_name='Precision'),hp.Metric('recall', display_name='Recall')])
    
def construct_optimizer(hparams):
    if hparams[HP_OPTIMIZER] == "adam":
        return tf.keras.optimizers.Adam(learning_rate = hparams[HP_LR])
    elif hparams[HP_OPTIMIZER] == "adagrad":
        return tf.keras.optimizers.Adagrad(learning_rate = hparams[HP_LR])
    #elif hparams[HP_OPTIMIZER] == "sgd":
    #    return tf.keras.optimizers.SGD(learning_rate = hparams[HP_LR])
    elif hparams[HP_OPTIMIZER] == "RMSprop":
        return tf.keras.optimizers.RMSprop(learning_rate = hparams[HP_LR])
    
def train_test_model(hparams):
    internal_layers = [tf.keras.layers.Dropout(hparams[HP_DROPOUT]), 
                       tf.keras.layers.Dense(hparams[HP_NEURONS_1], kernel_regularizer=tf.keras.regularizers.l2(hparams[HP_L2]), activation='relu'),
                       tf.keras.layers.Dropout(hparams[HP_DROPOUT]), 
                       tf.keras.layers.Dense(hparams[HP_NEURONS_2], kernel_regularizer=tf.keras.regularizers.l2(hparams[HP_L2]), activation='relu'),
                       tf.keras.layers.Dropout(hparams[HP_DROPOUT]), 
                       tf.keras.layers.Dense(hparams[HP_NEURONS_3], kernel_regularizer=tf.keras.regularizers.l2(hparams[HP_L2]), activation='relu')
                      ]
    
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(train_data.shape[1], activation='relu', input_shape=(train_data.shape[1],))] +
        internal_layers + [tf.keras.layers.Dense(1, activation="sigmoid")]
    )
    model.compile(optimizer=construct_optimizer(hparams), loss="binary_crossentropy", metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
    model.fit(train_data, train_labels, epochs=5, verbose=2)
    results = model.evaluate(val_data, val_labels)
    #_, results = model.evaluate(val_data, val_labels)
    return results

def run(run_dir, hparams):
      if os.path.exists(run_dir):
        return
      with tf.summary.create_file_writer(run_dir).as_default():
        hp.hparams(hparams)  # record the values used in this trial
        accuracy = train_test_model(hparams)
        tf.summary.scalar("accuracy", accuracy, step=1)

In [None]:
hyperparam_tunning =True

if hyperparam_tunning:
    session_num = 0
    total_runs=5
    #total_runs = len(HP_NEURONS_1.domain.values)*len(HP_NEURONS_2.domain.values)*len([HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value])*len(HP_OPTIMIZER.domain.values)*len([HP_L2.domain.min_value, HP_L2.domain.max_value])*len(HP_LR.domain.values)
    looping = list(product(HP_NEURONS_1.domain.values, HP_NEURONS_2.domain.values, HP_NEURONS_3.domain.values,[HP_DROPOUT.domain.min_value, HP_DROPOUT.domain.max_value], HP_OPTIMIZER.domain.values, [HP_L2.domain.min_value, HP_L2.domain.max_value], HP_LR.domain.values))
    random.shuffle(looping)
    for neurons_1, neurons_2, neurons_3, dropout, opt, l2, lr in looping:
        hp_params = {HP_NEURONS_1: neurons_1, HP_NEURONS_2: neurons_2, HP_NEURONS_3: neurons_3, HP_DROPOUT: dropout, HP_OPTIMIZER: opt, HP_L2: l2, HP_LR: lr}
        if session_num % 10 == 0:
            # clear everything every 10 models to avoid oom errors
            tf.keras.backend.clear_session()
        run_name = f"run_{session_num}"
        print(f"---Starting trial: {run_name} of {total_runs}")
        print({h.name: hp_params[h] for h in hp_params})
        run('hyperparameters_tunning/morgan_mordred_feature_selection/logs/hparam_tuning/' + run_name, hp_params)
        session_num += 1