In [1]:
import os
os.system('color')
import random
from termcolor import colored
from typing import List, Optional
from copy import deepcopy
import csv
from enum import Enum

import tensorflow as tf
from keras import layers, Model
import numpy as np
from sklearn.preprocessing import OneHotEncoder
# import matplotlib.pyplot as plt
# from matplotlib.ticker import MaxNLocator

import utility as ut

os.system('color')
random.seed(0)
np.random.seed(0)

DATAPATH = os.path.join(os.getcwd(), "newdatasets")
RESULTS_PATH = os.path.join(os.getcwd(), "autoencoder_results")

sh: 1: color: not found
2023-06-08 12:56:15.613178: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-08 12:56:15.717463: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-06-08 12:56:16.266009: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/rkannan/miniconda3/envs/richard_tf/lib/
2023-06-08 12:56

In [2]:
# =============================================================================
class PaddingType(Enum):
    ZERO=1
    TRUNCATE=2
    EMPTY=3

# =============================================================================
class NoisificationMethod(Enum):
    RANDOMSCATTER=1
    RANDOMCONTIG=2
# =============================================================================
def highlight_indices(seq: np.array, indices: np.array, color: str):
    # We use deepcopy to prevent mutation and we cast to object so that we can treat contents as python strings
    # otherwise, it gets messed up as it treats each element as a single character
    newseq = deepcopy(seq).astype('object')
    newseq[indices] = np.vectorize(lambda x: colored(x, color, attrs=["bold"]))(seq[indices])
    return newseq

# =============================================================================
def print_sequence(seq, 
                   header: str=None, 
                   incorrect_indices: Optional[np.array]=None, 
                   correct_indices: Optional[np.array]=None):

    newseq = deepcopy(seq)
    if correct_indices is not None and correct_indices.size != 0:
        newseq = highlight_indices(newseq, correct_indices, "green")
    if incorrect_indices is not None and incorrect_indices.size != 0:
        newseq = highlight_indices(newseq, incorrect_indices, "red")

    line_length = 40
    if header:
        print(header)
    print("=" * line_length)

    i = 0
    while i < len(newseq):
        print(" ".join(newseq[i: i+line_length]))
        i += line_length

# =============================================================================
def get_sequences(fasta_file: str) -> List[np.array]:
    sequences = []
    lines = []
    with open(fasta_file, "r") as input_file:
        lines = list(filter(None, input_file.read().split("\n")))

    parts = []
    for line in lines:
        if line.startswith(">"):
            if parts:
                sequences.append(np.array([c for c in "".join(parts)]))
            parts = []
        else:
            parts.append(line)
    if parts:
        sequences.append(np.array([c for c in "".join(parts)]))
    return sequences

# =============================================================================
def snake_case_prettify(s):
    return " ".join(w.capitalize() for w in s.split("_"))

# =============================================================================
def save_models(models):
    modeldir = os.path.join(os.getcwd(), "1Dmodels")
    if not os.path.exists(modeldir):
        os.makedirs(modeldir)

    for name, model in models.items():
        model.save(os.path.join(modeldir, name))

# =============================================================================
def load_models():
    modeldir = os.path.join(os.getcwd(), "1Dmodels")
    if os.path.exists(modeldir):
        return {f: tf.keras.models.load_model(os.path.join(modeldir, f)) for f in os.listdir(modeldir)}
    return {}

# =============================================================================
def print_diff_between_target_and_de_novo():

    dataset_dir = os.path.join(os.getcwd(), "newdatasets")
    
    de_novo_sequence = get_sequences(os.path.join(dataset_dir, "denovo_0.20_6.txt"))[0]
    target_sequence = get_sequences(os.path.join(dataset_dir, "target_sequence.txt"))[0]

    gap_indices = np.where(de_novo_sequence == '-')[0]
    incorrect_indices = np.where(de_novo_sequence != target_sequence)[0]
    correct_indices = np.where(de_novo_sequence == target_sequence)[0]
    incorrect_non_gaps = np.setdiff1d(incorrect_indices, gap_indices)
    print(f"Length of target: {len(target_sequence)}")
    print(f"Number of incorrect non-gaps: {len(incorrect_non_gaps)}")
    print(f"Number of gaps: {len(gap_indices)}")

    print_sequence(de_novo_sequence, "Protein Scaffold", incorrect_indices, correct_indices)

In [3]:
def KT_hp_build(hp):
    
    conv1_filter_size_ = hp.Int("conv1_filter_size",min_value=4, max_value=10,step = 1)
    conv2_filter_size_ = hp.Int("conv2_filter_size",min_value=4, max_value=10,step = 1)
    conv1_filters_ = hp.Int("conv1_filters",min_value=30, max_value=50,step = 4)
    conv2_filters_ = hp.Int("conv2_filters",min_value=50, max_value=100,step = 4)
    bridge_filters_ = hp.Int("bridge_filters",min_value=120, max_value=200,step = 8)
    bridge_filter_size_ = hp.Int("bridge_filter_size",min_value=4, max_value=10,step = 1)
    max_pool_ = hp.Int("max_pool",min_value=2, max_value=4,step = 1)
    dropout_ = hp.Choice("dropout_: ",[0.25, 0.35, 0.40,0.50])
    # =============================================================================
    class Encoder(layers.Layer):
        """Encoder part of autoencoder"""

        # -------------------------------------------------------------------------
        def __init__(self, conv1_filters, conv2_filters, conv1_filter_size=conv1_filter_size_, conv2_filter_size=conv2_filter_size_, maxpool=2, dropout=0.25, name="encoder", **kwargs):
            super().__init__(name=name, **kwargs)

            self.conv1 = layers.Conv1D(conv1_filters, conv1_filter_size, padding="same", activation="relu")
            self.conv2 = layers.Conv1D(conv2_filters, conv2_filter_size, padding="same", activation="relu")
            self.maxpool = layers.MaxPooling1D(maxpool, padding="same")
            self.dropout = layers.Dropout(dropout)

        # -------------------------------------------------------------------------
        def call(self, inputs):
            return self.dropout(self.maxpool(self.conv2(self.dropout(self.maxpool(self.conv1(inputs))))))

    # =============================================================================
    class Decoder(layers.Layer):
        """Decoder part of autoencoder"""

        # -------------------------------------------------------------------------
        def __init__(self, conv1_filters, conv2_filters, conv1_filter_size=conv1_filter_size_, conv2_filter_size=conv2_filter_size_, maxpool=2, dropout=0.25, name="decoder", **kwargs):
            super().__init__(name=name, **kwargs)

            self.conv1 = layers.Conv1DTranspose(conv1_filters, conv1_filter_size, padding="same", activation="relu")
            self.conv2 = layers.Conv1DTranspose(conv2_filters, conv2_filter_size, padding="same", activation="relu")
            self.upsample = layers.UpSampling1D(maxpool)
            self.dropout = layers.Dropout(dropout)

        # -------------------------------------------------------------------------
        def call(self, inputs):
            return self.dropout(self.upsample(self.conv2(self.dropout(self.upsample(self.conv1(inputs))))))

    # =============================================================================
    class Autoencoder(Model):
        """Autoencoder"""
        # -------------------------------------------------------------------------
        def __init__(self, num_classes, name="autoencoder", **kwargs):
            super().__init__(name=name, **kwargs)

            self.num_classes = num_classes
            self.hyperparameters = {
                "conv1_filters": conv1_filters_,#32,
                "conv2_filters": conv2_filters_,#64,
                "conv1_filter_size": conv1_filter_size_,#5,
                "conv2_filter_size": conv2_filter_size_,#5,
                "bridge_filters": bridge_filters_,#128,
                "bridge_filter_size": bridge_filter_size_,#5,
                "dropout": dropout_,#0.25,
                "maxpool": max_pool_#2,
            }

            hp = self.hyperparameters

            self.encoder = Encoder(hp["conv1_filters"], hp["conv2_filters"])
            self.bridge = layers.Conv1D(hp["bridge_filters"], hp["bridge_filter_size"], padding="same", activation="relu")
            self.decoder = Decoder(hp["conv2_filters"], hp["conv1_filters"])
            self.finallayer = layers.Conv1D(self.num_classes, hp["conv1_filter_size"], padding="same", activation="softmax")

        # -------------------------------------------------------------------------
        def call(self, inputs):
            return self.finallayer(self.decoder(self.bridge(self.encoder(inputs))))
    
    import numpy as np
    import tensorflow as tf
    from tensorflow.keras import layers, models

    class Transformer(tf.keras.Model):
        def __init__(self, input_shape, head_size, num_heads, ff_dim, num_transformer_blocks, mlp_units, dropout=0, mlp_dropout=0):
            super(Transformer, self).__init__()
            self.input_shape = input_shape
            self.head_size = head_size
            self.num_heads = num_heads
            self.ff_dim = ff_dim
            self.num_transformer_blocks = num_transformer_blocks
            self.mlp_units = mlp_units
            self.dropout = dropout
            self.mlp_dropout = mlp_dropout

        def transformer_encoder(self, inputs):
            # Normalization and Attention
            x = layers.LayerNormalization(epsilon=1e-6)(inputs)
            x = layers.MultiHeadAttention(
                key_dim=self.head_size, num_heads=self.num_heads, dropout=self.dropout
            )(x, x)
            x = layers.Dropout(self.dropout)(x)
            res = x + inputs

            # Feed Forward Part
            x = layers.LayerNormalization(epsilon=1e-6)(res)
            x = layers.Conv1D(filters=self.ff_dim, kernel_size=1, activation="relu")(x)
            x = layers.Dropout(self.dropout)(x)
            x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
            return x + res

        def build_model(self):
            inputs = tf.keras.Input(shape=self.input_shape)
            x = inputs
            for _ in range(self.num_transformer_blocks):
                x = self.transformer_encoder(x)

            x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
            for dim in self.mlp_units:
                x = layers.Dense(dim, activation="relu")(x)
                x = layers.Dropout(self.mlp_dropout)(x)
            outputs = layers.Dense(np.prod(self.input_shape), activation="softmax")(x)
            outputs = layers.Reshape(target_shape=self.input_shape, name='out_recon')(outputs)
            return models.Model(inputs, outputs)

        def call(self, inputs):
            x = inputs
            for _ in range(self.num_transformer_blocks):
                x = self.transformer_encoder(x)

            x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
            for dim in self.mlp_units:
                x = layers.Dense(dim, activation="relu")(x)
                x = layers.Dropout(self.mlp_dropout)(x)
            outputs = layers.Dense(np.prod(self.input_shape), activation="softmax")(x)
            outputs = layers.Reshape(target_shape=self.input_shape, name='out_recon')(outputs)
            return outputs
        
    # =============================================================================
    class ProteinScaffoldFixer():
        """Class to correct errors in a protein scaffold and fill gaps"""

        # -------------------------------------------------------------------------
        def __init__(self, 
                    output_seqs, 
                    paddingtype=PaddingType.EMPTY, 
                    noise_percent=0.2, 
                    noisemethod=NoisificationMethod.RANDOMSCATTER, 
                    numgaps=5, 
                    mingapsize=3, 
                    mincontigsize=1, 
                    epochs=200,
                    optimizer="adam",
                    early_stopping=False):

            self.epochs = epochs
            self.optimizer = optimizer
            self.paddingtype = paddingtype
            self.noise_percent = noise_percent
            self.early_stopping = early_stopping

            self.classes = np.array(["-", 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'X', 'Y', 'Z'])
            self.ohe = OneHotEncoder(sparse_output=False, categories=[self.classes])

            self.max_seq_length = max((len(seq) for seq in output_seqs))
            self.maxpool = 2
            while self.max_seq_length % (self.maxpool * self.maxpool) != 0:
                self.max_seq_length += 1

            self.output_seqs = output_seqs
            self.input_seqs = self.noisify_sequences(output_seqs, noise_percent, noisemethod, numgaps, mingapsize, mincontigsize)

            self.train_x = self.preprocess_sequences(self.input_seqs)
            self.train_y = self.preprocess_sequences(self.output_seqs)

            self.history = None

            self.autoencoder = Autoencoder(len(self.classes))

        # =============================================================================
        def noisify_sequences(self, seqs, noise_percent, noisemethod, numgaps=5, mingapsize=3, mincontigsize=3):
            if noisemethod == NoisificationMethod.RANDOMSCATTER:
                return self.noisify_by_random_scatter(seqs, noise_percent)
            elif noisemethod == NoisificationMethod.RANDOMCONTIG:
                return self.noisify_by_random_contigs(seqs, noise_percent, numgaps, mingapsize, mincontigsize)

        # =============================================================================
        def noisify_by_random_scatter(self, seqs, noise_percent):
            sequences = deepcopy(seqs)
            # To noisify our input data, we will replace random amino acids with something else
            for seq in sequences:

                # We randomly sample from all the possible indices of seq
                indices_to_replace = random.sample(range(len(seq)), int(noise_percent * len(seq)))
                seq[indices_to_replace] = "-"

            return sequences

        # =============================================================================
        def noisify_by_random_contigs(self, seqs, noise_percent, numgaps, mingapsize, mincontigsize):

            sequences = deepcopy(seqs)
            for seqind, seq in enumerate(sequences):

                amino_acid_length = len(seq)
                amino_acids_to_replace = int(amino_acid_length * noise_percent)

                # The idea is to build a gap queue, randomly putting each amino acid to replace into each gap
                # Then, we build a contig queue, randomly putting each amino acid into each contig
                gap_queue = np.zeros(numgaps).astype(int)
                for _ in range(amino_acids_to_replace):

                    # Any gap can be considered
                    valid_gaps = np.arange(numgaps)

                    # ... so long as we have no underfilled gaps
                    underfilled = gap_queue < mingapsize
                    # If we have any underfilled, then consider only those until they are no longer underfilled
                    if np.any(underfilled):
                        valid_gaps = valid_gaps[np.where(underfilled)]

                    # Once valid gap indices have been determined, randomly pick one to increment
                    gap_queue[np.random.choice(valid_gaps)] += 1

                # There can always be one more contig than gaps (if there's a contig at beginnning and end of sequence)
                contig_queue = np.zeros(numgaps+1).astype(int)

                # We have to allocate all the amino acids NOT in gaps into contigs BETWEEN the gaps
                for _ in range(amino_acid_length - amino_acids_to_replace):

                    # Any contig can be considered
                    valid_contigs = np.arange(len(contig_queue))

                    # ... so long as we have no underfilled contigs
                    underfilled = contig_queue < mincontigsize

                    # The exceptions are the first and last contigs. They are never considered underfilled
                    underfilled[0] = False
                    underfilled[-1] = False

                    # If we have any underfilled, then consider only those until they are no longer underfilled
                    if np.any(underfilled):
                        valid_contigs = valid_contigs[np.where(underfilled)]

                    # Once valid gap indices have been determined, randomly pick one to increment
                    contig_queue[np.random.choice(valid_contigs)] += 1

                # Once we have determined gap_queue and contig_queue, we iterate over them to set the gaps equal
                # to our blank amino acid character

                sequence_pointer = 0
                iscontig=True
                while sequence_pointer < len(seq):
                    if iscontig:
                        # Don't do anything for the contig except increment the pointer and pop off the contig queue
                        sequence_pointer += contig_queue[0]
                        contig_queue = np.delete(contig_queue, 0)
                    else:
                        seq[sequence_pointer:sequence_pointer+gap_queue[0]] = '-'
                        sequence_pointer += gap_queue[0]
                        gap_queue = np.delete(gap_queue, 0)

                    # We alternate between contigs and gaps
                    iscontig = not iscontig

            return sequences

        # -------------------------------------------------------------------------
        def predict_sequence(self, seq, predict_only_gaps):

            scaffold = self.preprocess_sequences([seq])
            pred = self.autoencoder.predict(scaffold).reshape(self.max_seq_length, len(self.classes))

            # Set the probability of empty "-" to zero, since we always want to predict something
            emptyclass = np.where(self.ohe.transform(np.array("-").reshape(-1, 1))[0])[0][0]
            pred[:, emptyclass] = 0.0

            # Convert the probability distribution to a one-hot encoded vector
            mask = pred == np.amax(pred, axis=1).reshape(pred.shape[0], 1)
            indices = list((i, np.where(mask[i])[0][0]) for i in range(mask.shape[0]))

            pred = np.zeros(pred.shape)
            for i in indices:
                pred[i] = 1

            # Then we can use our one hot encoder to convert back to the original sequence of classes
            pred = self.ohe.inverse_transform(pred[:len(seq), :len(self.classes)]).reshape(len(seq))

            if predict_only_gaps:
                # We only care about predicting the gaps in seq, so replace amino acids in prediction with original nongaps
                nongaps = np.where(seq != '-')[0]
                pred[nongaps] = seq[nongaps]

            return pred

        # =============================================================================
        def preprocess_sequences(self, seqs: List[np.array]) -> np.array:

            # the value -1 lets numpy know to infer the shape. So it's just a column vector of length num_samples
            seqs = [np.array(seq).reshape(-1, 1) for seq in seqs]

            # One-hot encode each sequence
            seqs = [self.ohe.fit_transform(seq) for seq in seqs]

            if self.paddingtype == PaddingType.ZERO:
                # The sequences may have different lengths, so we will pad them with zeros
                # We are padding to fill up to max length, then we can turn it into a single numpy tensor
                return np.array([np.pad(seq, ((0, self.max_seq_length-len(seq)), (0, 0))) for seq in seqs])

            elif self.paddingtype == PaddingType.EMPTY:

                # We pad with the "empty" class
                emptyclass = self.ohe.fit_transform(np.array("-").reshape(-1, 1))[0]

                # dynamically extend each seq by enough emptyvals to make a single sequence length
                return np.array([np.vstack((seq, *(emptyclass for _ in range(self.max_seq_length-len(seq))))) for seq in seqs])

            elif self.paddingtype == PaddingType.TRUNCATE:
                # FIXME: Implement me
                raise Exception("Have not implemented Padding Type TRUNCATE!")
                
         # =============================================================================
        def train(self, verbose="auto", optimizer="adam", epochs=None):
            self.optimizer = optimizer
            if epochs:
                self.epochs = epochs

            self.autoencoder.compile(optimizer=self.optimizer, loss="categorical_crossentropy", metrics=["accuracy"])
            callbacks = []
            if self.early_stopping:
                callbacks.append(tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10))
            self.history = self.autoencoder.fit(self.train_x, self.train_y, epochs=self.epochs, validation_split=0.15, verbose=verbose, callbacks=callbacks)
            
            
    ut.mkdir_if_not_exists(RESULTS_PATH)
    ut.mkdir_if_not_exists(DATAPATH)

    epochs = 200
    noise_percent = 0.25

    cwd = os.getcwd()

    # change to the location of the data for reading
    os.chdir(DATAPATH)
    de_novo_sequence = get_sequences("de_novo_sequence.txt")[0]
    target_sequence = get_sequences("target_sequence.txt")[0]
    alltrainingdata = get_sequences("training_sequences.txt")
    metadata = "training_sequences_metadata.csv"

    # then change to the results directory for writing
    os.chdir(RESULTS_PATH)

    sequences_to_train_on = [1000]
    # sequences_to_train_on = [100, 500, 1000, 2000]

    trainfilename = "cda_breakdown_breakdown_of_numtrain.csv"
    headers = ["Num Training Instances", "Full Accuracy", "Gap Accuracy", "Nongap Accuracy"]
    with open(trainfilename, "w+", encoding="utf-8", newline="") as f:
        train_writer = csv.writer(f)
        train_writer.writerow(headers)

    train_file = open(trainfilename, "a+", encoding="utf-8", newline="")
    train_writer = csv.writer(train_file)

    for numtrain in sequences_to_train_on:

        trainingdata = alltrainingdata[:numtrain]
        random.shuffle(trainingdata)

        incorrect_indices = np.where(de_novo_sequence != target_sequence)[0]
        correct_indices = np.where(de_novo_sequence == target_sequence)[0]

        fixer = ProteinScaffoldFixer(trainingdata, noise_percent=noise_percent, epochs=epochs)
        fixer.autoencoder.compile(optimizer=fixer.optimizer, loss="categorical_crossentropy", metrics=["accuracy"])
        print(type(fixer.autoencoder))
    return fixer.autoencoder

In [4]:
import keras_tuner as kt
KT_hp_build(kt.HyperParameters())

<class '__main__.KT_hp_build.<locals>.Autoencoder'>


2023-06-08 12:56:53.312266: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-06-08 12:56:55.061815: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14759 MB memory:  -> device: 0, name: Quadro RTX 5000, pci bus id: 0000:19:00.0, compute capability: 7.5
2023-06-08 12:56:55.062501: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 14759 MB memory:  -> device: 1, name: Quadro RTX 5000, pci bus id: 0000:1a:00.0, compute capability: 7.5
2023-06-08 12:56:55.062998: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/tas

<__main__.KT_hp_build.<locals>.Autoencoder at 0x7fd185c28820>

In [38]:
# import pickle  
  
# # Open a file and use dump()
# with open('train_x.pkl', 'wb') as file:
      
#     # A new file will be created
#     pickle.dump(train_x, file)

#     # Open a file and use dump()
# with open('train_y.pkl', 'wb') as file:
      
#     # A new file will be created
#     pickle.dump(train_y, file)

In [2]:
import pickle
  
# Open the file in binary mode
with open('train_x.pkl', 'rb') as file:
      
    # Call load method to deserialze
    train_x = pickle.load(file)

    # Open the file in binary mode
with open('train_y.pkl', 'rb') as file:
      
    # Call load method to deserialze
    train_y = pickle.load(file)
print(train_x.shape)
print(train_y.shape)

(1000, 440, 24)
(1000, 440, 24)


In [6]:
train_x.shape[1:]

(440, 24)

In [6]:
#initialize the tuner
tuner = kt.RandomSearch(
    hypermodel= KT_hp_build,
    objective="val_accuracy", 
    overwrite = True,# Do not resume the previous search in the same directory.
    max_trials= 15,
    directory = "models/param_tuning",  # Set a directory to store the intermediate results.
    project_name= "param_tuning"
)

<class '__main__.KT_hp_build.<locals>.Autoencoder'>


In [7]:
tuner.search_space_summary()

Search space summary
Default search space size: 8
conv1_filter_size (Int)
{'default': None, 'conditions': [], 'min_value': 4, 'max_value': 10, 'step': 1, 'sampling': 'linear'}
conv2_filter_size (Int)
{'default': None, 'conditions': [], 'min_value': 4, 'max_value': 10, 'step': 1, 'sampling': 'linear'}
conv1_filters (Int)
{'default': None, 'conditions': [], 'min_value': 30, 'max_value': 50, 'step': 4, 'sampling': 'linear'}
conv2_filters (Int)
{'default': None, 'conditions': [], 'min_value': 50, 'max_value': 100, 'step': 4, 'sampling': 'linear'}
bridge_filters (Int)
{'default': None, 'conditions': [], 'min_value': 120, 'max_value': 200, 'step': 8, 'sampling': 'linear'}
bridge_filter_size (Int)
{'default': None, 'conditions': [], 'min_value': 4, 'max_value': 10, 'step': 1, 'sampling': 'linear'}
max_pool (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 4, 'step': 1, 'sampling': 'linear'}
dropout_:  (Choice)
{'default': 0.25, 'conditions': [], 'values': [0.25, 0.35, 0.4

In [8]:
#BATCH_SIZE = 64
MAX_EPOCH_LENGTH = 200
tuner.search(
    train_x,
    train_y,
    epochs=MAX_EPOCH_LENGTH,
    batch_size=64,
    verbose=1,
    validation_split=0.15,
    callbacks = [(tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, mode='min'))]
)

Trial 15 Complete [00h 01m 20s]
val_accuracy: 0.97404545545578

Best val_accuracy So Far: 0.9742878675460815
Total elapsed time: 00h 19m 48s
INFO:tensorflow:Oracle triggered exit


In [9]:
tuner.results_summary()

Results summary
Results in models/param_tuning/param_tuning
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x7fd185aabd00>
Trial summary
Hyperparameters:
conv1_filter_size: 5
conv2_filter_size: 9
conv1_filters: 46
conv2_filters: 90
bridge_filters: 160
bridge_filter_size: 5
max_pool: 2
dropout_: : 0.5
Score: 0.9742878675460815
Trial summary
Hyperparameters:
conv1_filter_size: 10
conv2_filter_size: 10
conv1_filters: 50
conv2_filters: 74
bridge_filters: 192
bridge_filter_size: 5
max_pool: 2
dropout_: : 0.5
Score: 0.974060595035553
Trial summary
Hyperparameters:
conv1_filter_size: 4
conv2_filter_size: 8
conv1_filters: 38
conv2_filters: 94
bridge_filters: 120
bridge_filter_size: 5
max_pool: 4
dropout_: : 0.25
Score: 0.97404545545578
Trial summary
Hyperparameters:
conv1_filter_size: 4
conv2_filter_size: 5
conv1_filters: 46
conv2_filters: 78
bridge_filters: 168
bridge_filter_size: 5
max_pool: 3
dropout_: : 0.35
Score: 0.9734696745872498
Trial summary
Hyperparameters: