# Keras Tuner Bars

In [1]:
import numpy as np
import pandas as pd
import sklearn.preprocessing
import time
import sys
import copy

sys.path.append("..")
from helpers import filename_for

In [2]:
import tensorflow
from tensorflow import keras
import kerastuner

print("tensorflow", tensorflow.__version__)
print("keras", keras.__version__)
print("keras-tuner", kerastuner.__version__)

tensorflow 2.1.0
keras 2.2.4-tf
keras-tuner 1.0.1


In [3]:
files = [filename_for(15, 30, 600, 500, n, "inclxx", s, "bars.parquet") for n in [1, 2, 3, 4] for s in range(7)] # 20
dfs = [pd.read_parquet(file) for file in files]
data = pd.concat(dfs, ignore_index=True).sample(frac=1)
data.loc[data["nHits"] == 0, "nPN"] = 0
display(data)

  Numpy8 = numba.jitclass(spec8)(NumpyIO)
  Numpy32 = numba.jitclass(spec32)(NumpyIO)
  from pandas.core.index import CategoricalIndex, RangeIndex, Index, MultiIndex


Unnamed: 0,nPN,nPP,nPH,nHits,nClus,Edep,0,1,2,3,...,5990,5991,5992,5993,5994,5995,5996,5997,5998,5999
90369,2,2,2,32,13,583,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
211200,4,4,4,59,22,1138,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
192411,3,3,3,29,16,356,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
221785,4,4,4,61,20,1107,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
180530,3,3,3,32,16,655,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
140029,3,3,3,32,13,630,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
58964,1,1,1,9,3,232,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
256688,4,4,4,69,24,1275,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
148577,3,2,2,27,9,520,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
cols_tri = ["nHits", "nClus", "Edep"]
cols_e = [str(i) for i in range(0, 30 * 100 * 2, 2)]
cols_t = [str(i + 1) for i in range(0, 30 * 100 * 2, 2)]

scaler = sklearn.preprocessing.MaxAbsScaler()
s_tri = copy.copy(scaler).fit(data[cols_tri])
s_e = copy.copy(scaler).fit(data[cols_e].values.reshape(-1, 1))
s_t = copy.copy(scaler).fit(data[cols_t].values.reshape(-1, 1))

data_scaled = np.concatenate(
    (
        s_tri.transform(data[cols_tri]),
        s_e.transform(data[cols_e].values.reshape(-1, 1)).reshape(-1, len(cols_e)),
        s_t.transform(data[cols_t].values.reshape(-1, 1)).reshape(-1, len(cols_t)),
    ),
    axis=1,
)
display(data_scaled)

array([[0.21192053, 0.25490196, 0.23593687, ..., 0.        , 0.        ,
        0.        ],
       [0.39072848, 0.43137255, 0.46054229, ..., 0.        , 0.        ,
        0.        ],
       [0.19205298, 0.31372549, 0.14407123, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.45695364, 0.47058824, 0.51598543, ..., 0.        , 0.        ,
        0.        ],
       [0.17880795, 0.17647059, 0.21044112, ..., 0.        , 0.        ,
        0.        ],
       [0.0397351 , 0.05882353, 0.0805342 , ..., 0.        , 0.        ,
        0.        ]])

In [5]:
label = "nPN"
max_neutrons = 4
num_classes = max_neutrons + 1

ys = keras.utils.to_categorical(data[[label]].values.ravel())
Xs = data_scaled

In [6]:
class Model(kerastuner.HyperModel):
    def __init__(self, num_classes):
        self.num_classes = num_classes

    def build(self, hp):
        model = keras.models.Sequential()
        model.add(keras.layers.Dense(units=hp.Int(f"relu1nodes", min_value=1000, max_value=10000, step=3000), activation="relu",))
        model.add(keras.layers.Dense(units=hp.Int(f"relu2nodes", min_value=500, max_value=3000, step=200), activation="relu",))
        model.add(keras.layers.Dense(units=self.num_classes, activation="softmax"))

        optimizer = hp.Choice("optimizer", ["adam", "adagrad", "adadelta", "SGD"])
        loss = keras.losses.CategoricalCrossentropy()

        model.compile(loss=loss, optimizer=optimizer, metrics=["accuracy"])

        return model

In [11]:
model = Model(num_classes)

tuner = kerastuner.tuners.BayesianOptimization(
#tuner = kerastuner.tuners.RandomSearch(
    model, objective="val_accuracy", max_trials=40, executions_per_trial=1, directory="kerastuner", project_name="barstri-V1", # , overwrite=True
)
tuner.search_space_summary()

In [None]:
tuner.search(
    Xs, ys, verbose=2, validation_split=0.1, epochs=5
)

Train on 252000 samples, validate on 28000 samples
Epoch 1/5
252000/252000 - 104s - loss: 0.6555 - accuracy: 0.7110 - val_loss: 0.6347 - val_accuracy: 0.7199
Epoch 2/5
252000/252000 - 104s - loss: 0.6033 - accuracy: 0.7365 - val_loss: 0.6105 - val_accuracy: 0.7355
Epoch 3/5
252000/252000 - 100s - loss: 0.5372 - accuracy: 0.7695 - val_loss: 0.6394 - val_accuracy: 0.7299
Epoch 4/5
252000/252000 - 99s - loss: 0.3765 - accuracy: 0.8475 - val_loss: 0.8199 - val_accuracy: 0.7127
Epoch 5/5
252000/252000 - 100s - loss: 0.2164 - accuracy: 0.9170 - val_loss: 1.2086 - val_accuracy: 0.6927


Train on 252000 samples, validate on 28000 samples
Epoch 1/5
252000/252000 - 153s - loss: 0.6560 - accuracy: 0.7178 - val_loss: 0.6276 - val_accuracy: 0.7330
Epoch 2/5


In [None]:
tuner.results_summary()