# Keras Tuner Bars

In [1]:
import h5py
import numpy as np
import tensorflow
from tensorflow import keras
import kerastuner

print("tensorflow", tensorflow.__version__)
print("keras", keras.__version__)
print("keras-tuner", kerastuner.__version__)

tensorflow 2.1.0
keras 2.2.4-tf
keras-tuner 1.0.1


In [2]:
class HDF5DataGeneratorBars(keras.utils.Sequence):
    def __init__(self, file, num_classes, batch_size=200, mode="train", train_test_split=0.8):
        self.file = h5py.File(file, "r")
        self.num_classes = num_classes
        self.batch_size = batch_size
        self.mode = mode

        batches = int(np.floor(self.file["multiplicity"].len() / self.batch_size))
        if self.mode == "train":
            self.num_batches = int(batches * train_test_split)
            self.offset = 0
        elif self.mode == "test":
            self.num_batches = int(batches * (1 - train_test_split))
            self.offset = int(batches * train_test_split)
        else:
            raise

    def __len__(self):
        return self.num_batches

    def __getitem__(self, index):
        x, y = self.getbatch(index)
        np.place(y, y >= self.num_classes, self.num_classes - 1)  # FIXME
        y = keras.utils.to_categorical(y, num_classes=self.num_classes)
        return x, y

    def getbatch(self, index):
        "Generate one batch of data"
        start = self.batch_size * (index + self.offset)
        end = self.batch_size * (index + self.offset + 1)
        
        # all the bars PLUS the trifeatures
        x1 = np.array(self.file["flatfeatures"][start:end])
        x2 = np.array(self.file["consolidated"][start:end])
        x = np.append(x1, x2, axis=1)
        
        # multiplicity column 2 -> label = 'nPH'
        y = np.array(self.file["multiplicity"][start:end, [2]].ravel())
        
        return x, y

In [3]:
class Model(kerastuner.HyperModel):
    def __init__(self, num_classes):
        self.num_classes = num_classes

    def build(self, hp):
        model = keras.models.Sequential()
        model.add(
            keras.layers.Dense(
                units=hp.Int(f"units_1", min_value=4000, max_value=14000, step=2000), activation="relu",
            )
        )
        model.add(
            keras.layers.Dense(
                units=hp.Int(f"units_2", min_value=2000, max_value=10000, step=1000), activation="relu",
            )
        )
        model.add(
            keras.layers.Dense(
                units=250, activation="relu",
            )
        )
        model.add(keras.layers.Dense(units=self.num_classes, activation="softmax"))

        optimizer = keras.optimizers.Adam(hp.Choice("learning_rate", values=[1e-1, 1e-2, 1e-3]))
        loss = keras.losses.CategoricalCrossentropy()

        model.compile(loss=loss, optimizer=optimizer, metrics=["accuracy"])

        return model

In [4]:
num_dp = 30
max_neutrons = 5
num_classes = max_neutrons + 1

file = f"data/600AMeV_{num_dp}dp.bars-shuffled.h5"
generator = HDF5DataGeneratorBars(file, num_classes=num_classes, mode='train')
test_generator = HDF5DataGeneratorBars(file, num_classes=num_classes, mode='test')
model = Model(num_classes)

#tuner = kerastuner.tuners.BayesianOptimization(
tuner = kerastuner.tuners.RandomSearch(
    model,
    objective="val_accuracy",
    max_trials=20,
    executions_per_trial=1,
    directory="tmp-kerastuner-V2",
    project_name="tunebars3",
)
tuner.search_space_summary()

INFO:tensorflow:Reloading Oracle from existing project tmp-kerastuner-V2\tunebars3\oracle.json


In [None]:
tuner.search(
    generator,
    validation_data = test_generator,
    verbose=2,
)

  ...
    to  
  ['...']
Train for 20000 steps, validate for 4999 steps
20000/20000 - 2363s - loss: 3504.1054 - accuracy: 0.2031 - val_loss: 1.6328 - val_accuracy: 0.2045


  ...
    to  
  ['...']
Train for 20000 steps, validate for 4999 steps
20000/20000 - 2145s - loss: 3055.1761 - accuracy: 0.4603 - val_loss: 1.4506 - val_accuracy: 0.3454


  ...
    to  
  ['...']
Train for 20000 steps, validate for 4999 steps
20000/20000 - 2238s - loss: 364612.4700 - accuracy: 0.6633 - val_loss: 0.7263 - val_accuracy: 0.6812


  ...
    to  
  ['...']
Train for 20000 steps, validate for 4999 steps
20000/20000 - 2820s - loss: 52667.2703 - accuracy: 0.3859 - val_loss: 1.6316 - val_accuracy: 0.2044


  ...
    to  
  ['...']
Train for 20000 steps, validate for 4999 steps
20000/20000 - 2812s - loss: 603789.7836 - accuracy: 0.6642 - val_loss: 0.7346 - val_accuracy: 0.6765


  ...
    to  
  ['...']
Train for 20000 steps, validate for 4999 steps
20000/20000 - 2075s - loss: 9823.3722 - accuracy: 0.2032 - val_loss: 1.6346 - val_accuracy: 0.2045


  ...
    to  
  ['...']
Train for 20000 steps, validate for 4999 steps
20000/20000 - 2482s - loss: 237782.6208 - accuracy: 0.6590 - val_loss: 0.7384 - val_accuracy: 0.6794


  ...
    to  
  ['...']
Train for 20000 steps, validate for 4999 steps
20000/20000 - 2579s - loss: 14554.6767 - accuracy: 0.6730 - val_loss: 0.6928 - val_accuracy: 0.7018


  ...
    to  
  ['...']
Train for 20000 steps, validate for 4999 steps


In [None]:
tuner.results_summary()