# Keras Tuner Bars

In [1]:
import sys

import numpy as np
import pandas as pd
import sklearn.preprocessing

sys.path.append("..")
from helpers import filename_for

In [2]:
import kerastuner
import tensorflow
from tensorflow import keras

print("tensorflow", tensorflow.__version__)
print("keras", keras.__version__)
print("keras-tuner", kerastuner.__version__)

tensorflow 2.3.1
keras 2.4.0
keras-tuner 1.0.1


In [3]:
class DataGeneratorBars(keras.utils.Sequence):
    def __init__(self, config):
        self.c = config

        self.labels = []
        self.features = []

        self.scaler_tri = sklearn.preprocessing.MaxAbsScaler()
        self.scaler_e = sklearn.preprocessing.MaxAbsScaler()
        self.scaler_t = sklearn.preprocessing.MaxAbsScaler()

        file = filename_for(
            self.c["distance"],
            self.c["doubleplane"],
            self.c["energy"],
            self.c["erel"],
            self.c["neutrons"][0],
            "inclxx",
            self.c["subruns"][0],
            "bars.parquet",
        )
        data = pd.read_parquet(file)
        rows = len(data.index)
        del data

        self.batches_per_subrun = (rows * len(self.c["neutrons"])) // self.c["batch_size"]
        self.batches_per_cache = self.batches_per_subrun * self.c["subrun_cache_size"]
        self.len = self.batches_per_subrun * len(self.c["subruns"])

        self.cache_subruns = [
            self.c["subruns"][i : i + self.c["subrun_cache_size"]]
            for i in range(0, len(self.c["subruns"]), self.c["subrun_cache_size"])
        ]
        self.current_cache = -1

        print(f"Rows in one file: {rows}")
        print(f"{self.batches_per_subrun} batches per subrun")
        print(f"{self.len} total batches in {self.cache_subruns} caches")

        self.fitscalers()
        self.load(0)

    def __len__(self):
        return self.len

    def __getitem__(self, index):
        cacheid = index // self.batches_per_cache
        i = index % (self.batches_per_cache)
        # print(f"{index} -> c{cacheid}-i{i}")

        if cacheid != self.current_cache:
            self.load(cacheid)

        a = i * self.c["batch_size"]
        b = (i + 1) * self.c["batch_size"]

        x = self.features[a:b]
        y = self.labels[a:b]
        return x, y

    def load(self, cacheid):
        subruns = self.cache_subruns[cacheid]
        print(f"Loading subruns {subruns} for cache {cacheid}")

        files = [
            filename_for(
                self.c["distance"],
                self.c["doubleplane"],
                self.c["energy"],
                self.c["erel"],
                n,
                "inclxx",
                subrun,
                "bars.parquet",
            )
            for n in self.c["neutrons"]
            for subrun in subruns
        ]
        data = pd.concat([pd.read_parquet(file) for file in files], ignore_index=True).sample(frac=1)
        data.loc[data["nHits"] == 0, self.c["label"]] = 0

        self.current_cache = cacheid
        self.features = np.concatenate(
            (
                # self.scaler_tri.transform(data[cols_tri]),
                self.scaler_e.transform(data[self.c["cols_e"]].values.reshape(-1, 1)).reshape(
                    -1, len(self.c["cols_e"])
                ),
                self.scaler_t.transform(data[self.c["cols_t"]].values.reshape(-1, 1)).reshape(
                    -1, len(self.c["cols_t"])
                ),
            ),
            axis=1,
        )
        self.labels = keras.utils.to_categorical(
            data[[self.c["label"]]].values.ravel(), num_classes=len(self.c["neutrons"]) + 1
        )
        del data

    def fitscalers(self):
        subruns = range(5)  # self.cache_subruns[0]
        files = [
            filename_for(
                self.c["distance"],
                self.c["doubleplane"],
                self.c["energy"],
                self.c["erel"],
                n,
                "inclxx",
                subrun,
                "bars.parquet",
            )
            for n in self.c["neutrons"]
            for subrun in subruns
        ]
        data = pd.concat([pd.read_parquet(file) for file in files], ignore_index=True)
        self.scaler_e.fit(data[self.c["cols_e"]].values.reshape(-1, 1))
        self.scaler_t.fit(data[self.c["cols_t"]].values.reshape(-1, 1))
        del data

In [4]:
class Model(kerastuner.HyperModel):
    def __init__(self, num_classes):
        self.num_classes = num_classes

    def build(self, hp):
        model = keras.models.Sequential()
        model.add(
            keras.layers.Dense(
                units=hp.Int(f"relu1nodes", min_value=1000, max_value=25000, step=3000), activation="relu",
            )
        )
        model.add(
            keras.layers.Dense(units=hp.Int(f"relu2nodes", min_value=500, max_value=3000, step=200), activation="relu",)
        )
        model.add(keras.layers.Dense(units=self.num_classes, activation="softmax"))

        # optimizer = hp.Choice("optimizer", ["adam", "adagrad", "adadelta", "SGD"])
        optimizer = keras.optimizers.Adam()
        loss = keras.losses.CategoricalCrossentropy()

        model.compile(loss=loss, optimizer=optimizer, metrics=["accuracy"])

        return model

In [5]:
config = {
    "distance": 15,
    "doubleplane": 30,
    "energy": 600,
    "erel": 500,
    "neutrons": [1, 2, 3, 4],
    "subruns": range(7),  # range(19),
    "subrun_cache_size": 7,
    "batch_size": 200,
    "cols_e": [str(i) for i in range(0, 30 * 100 * 2, 2)],
    "cols_t": [str(i + 1) for i in range(0, 30 * 100 * 2, 2)],
    "label": "nPN",
}

validation_config = config.copy()
validation_config["subruns"] = [19]

In [6]:
generator = DataGeneratorBars(config)
validation_generator = DataGeneratorBars(validation_config)

Rows in one file: 10000
200 batches per subrun
1400 total batches in [range(0, 7)] caches
Loading subruns range(0, 7) for cache 0
Rows in one file: 10000
200 batches per subrun
200 total batches in [[19]] caches
Loading subruns [19] for cache 0


In [7]:
model = Model(len(config["neutrons"]) + 1)

# tuner = kerastuner.tuners.RandomSearch(
tuner = kerastuner.tuners.BayesianOptimization(
    model,
    objective="val_accuracy",
    max_trials=20,
    executions_per_trial=1,
    directory="kerastuner",
    project_name="bars",
    # overwrite=True
)
tuner.search_space_summary()

In [8]:
tuner.search(generator, verbose=2, epochs=5, shuffle=False, validation_data=validation_generator)

Epoch 1/5
1400/1400 - 56s - loss: 0.7971 - accuracy: 0.6343 - val_loss: 0.7434 - val_accuracy: 0.6599
Epoch 2/5
1400/1400 - 50s - loss: 0.6795 - accuracy: 0.6963 - val_loss: 0.7536 - val_accuracy: 0.6638
Epoch 3/5
1400/1400 - 49s - loss: 0.5074 - accuracy: 0.7894 - val_loss: 0.9643 - val_accuracy: 0.6529
Epoch 4/5
1400/1400 - 48s - loss: 0.3136 - accuracy: 0.8801 - val_loss: 1.3203 - val_accuracy: 0.6281
Epoch 5/5
1400/1400 - 47s - loss: 0.2084 - accuracy: 0.9233 - val_loss: 1.6085 - val_accuracy: 0.6424


Epoch 1/5
1400/1400 - 260s - loss: 0.7919 - accuracy: 0.6369 - val_loss: 0.7402 - val_accuracy: 0.6578
Epoch 2/5
1400/1400 - 233s - loss: 0.6536 - accuracy: 0.7104 - val_loss: 0.7642 - val_accuracy: 0.6658
Epoch 3/5
1400/1400 - 183s - loss: 0.4213 - accuracy: 0.8312 - val_loss: 1.0793 - val_accuracy: 0.6566
Epoch 4/5
1400/1400 - 188s - loss: 0.2406 - accuracy: 0.9099 - val_loss: 1.4763 - val_accuracy: 0.6346
Epoch 5/5
1400/1400 - 189s - loss: 0.1456 - accuracy: 0.9484 - val_loss: 1.7148 - val_accuracy: 0.6421


Epoch 1/5
1400/1400 - 209s - loss: 0.7933 - accuracy: 0.6363 - val_loss: 0.7405 - val_accuracy: 0.6588
Epoch 2/5
1400/1400 - 209s - loss: 0.6567 - accuracy: 0.7086 - val_loss: 0.7667 - val_accuracy: 0.6643
Epoch 3/5
1400/1400 - 200s - loss: 0.4252 - accuracy: 0.8298 - val_loss: 1.0832 - val_accuracy: 0.6563
Epoch 4/5
1400/1400 - 216s - loss: 0.2446 - accuracy: 0.9091 - val_loss: 1.4881 - val_accuracy: 0.6398
Epoch 5/5
1400/1400 - 221s - loss: 0.1465 - accuracy: 0.9481 - val_loss: 1.7388 - val_accuracy: 0.6448


Epoch 1/5
1400/1400 - 255s - loss: 0.7921 - accuracy: 0.6364 - val_loss: 0.7413 - val_accuracy: 0.6581
Epoch 2/5
1400/1400 - 229s - loss: 0.6565 - accuracy: 0.7088 - val_loss: 0.7659 - val_accuracy: 0.6660
Epoch 3/5
1400/1400 - 189s - loss: 0.4279 - accuracy: 0.8275 - val_loss: 1.0760 - val_accuracy: 0.6556
Epoch 4/5
1400/1400 - 235s - loss: 0.2461 - accuracy: 0.9078 - val_loss: 1.5084 - val_accuracy: 0.6301
Epoch 5/5
1400/1400 - 246s - loss: 0.1475 - accuracy: 0.9477 - val_loss: 1.7744 - val_accuracy: 0.6420


Epoch 1/5
1400/1400 - 295s - loss: 0.7911 - accuracy: 0.6373 - val_loss: 0.7401 - val_accuracy: 0.6583
Epoch 2/5
1400/1400 - 283s - loss: 0.6519 - accuracy: 0.7112 - val_loss: 0.7684 - val_accuracy: 0.6646
Epoch 3/5
1400/1400 - 265s - loss: 0.4165 - accuracy: 0.8326 - val_loss: 1.0884 - val_accuracy: 0.6583
Epoch 4/5
1400/1400 - 262s - loss: 0.2448 - accuracy: 0.9069 - val_loss: 1.4712 - val_accuracy: 0.6453
Epoch 5/5
1400/1400 - 219s - loss: 0.1439 - accuracy: 0.9489 - val_loss: 1.8077 - val_accuracy: 0.6400


Epoch 1/5
1400/1400 - 174s - loss: 0.7926 - accuracy: 0.6364 - val_loss: 0.7420 - val_accuracy: 0.6581
Epoch 2/5
1400/1400 - 140s - loss: 0.6584 - accuracy: 0.7078 - val_loss: 0.7615 - val_accuracy: 0.6668
Epoch 3/5
1400/1400 - 113s - loss: 0.4333 - accuracy: 0.8251 - val_loss: 1.0507 - val_accuracy: 0.6590
Epoch 4/5
1400/1400 - 113s - loss: 0.2497 - accuracy: 0.9065 - val_loss: 1.5022 - val_accuracy: 0.6304
Epoch 5/5
1400/1400 - 113s - loss: 0.1524 - accuracy: 0.9454 - val_loss: 1.6839 - val_accuracy: 0.6462


Epoch 1/5
1400/1400 - 127s - loss: 0.7899 - accuracy: 0.6379 - val_loss: 0.7398 - val_accuracy: 0.6597
Epoch 2/5
1400/1400 - 127s - loss: 0.6550 - accuracy: 0.7087 - val_loss: 0.7621 - val_accuracy: 0.6661
Epoch 3/5
1400/1400 - 109s - loss: 0.4317 - accuracy: 0.8242 - val_loss: 1.1151 - val_accuracy: 0.6544
Epoch 4/5
1400/1400 - 109s - loss: 0.2663 - accuracy: 0.8978 - val_loss: 1.4485 - val_accuracy: 0.6427
Epoch 5/5
1400/1400 - 109s - loss: 0.1597 - accuracy: 0.9424 - val_loss: 1.7835 - val_accuracy: 0.6475


Epoch 1/5
1400/1400 - 79s - loss: 0.7937 - accuracy: 0.6359 - val_loss: 0.7430 - val_accuracy: 0.6579
Epoch 2/5
1400/1400 - 79s - loss: 0.6642 - accuracy: 0.7043 - val_loss: 0.7579 - val_accuracy: 0.6647
Epoch 3/5
1400/1400 - 70s - loss: 0.4537 - accuracy: 0.8161 - val_loss: 1.0169 - val_accuracy: 0.6571
Epoch 4/5
1400/1400 - 71s - loss: 0.2630 - accuracy: 0.9012 - val_loss: 1.4609 - val_accuracy: 0.6332
Epoch 5/5
1400/1400 - 69s - loss: 0.1612 - accuracy: 0.9425 - val_loss: 1.6952 - val_accuracy: 0.6471


Epoch 1/5
1400/1400 - 55s - loss: 0.7926 - accuracy: 0.6360 - val_loss: 0.7416 - val_accuracy: 0.6599
Epoch 2/5
1400/1400 - 56s - loss: 0.6645 - accuracy: 0.7041 - val_loss: 0.7579 - val_accuracy: 0.6658
Epoch 3/5
1400/1400 - 47s - loss: 0.4638 - accuracy: 0.8103 - val_loss: 1.0722 - val_accuracy: 0.6547
Epoch 4/5
1400/1400 - 47s - loss: 0.2872 - accuracy: 0.8898 - val_loss: 1.3669 - val_accuracy: 0.6410
Epoch 5/5
1400/1400 - 47s - loss: 0.1799 - accuracy: 0.9339 - val_loss: 1.7090 - val_accuracy: 0.6454


Epoch 1/5
1400/1400 - 61s - loss: 0.7947 - accuracy: 0.6359 - val_loss: 0.7412 - val_accuracy: 0.6594
Epoch 2/5
1400/1400 - 61s - loss: 0.6683 - accuracy: 0.7023 - val_loss: 0.7537 - val_accuracy: 0.6668
Epoch 3/5
1400/1400 - 53s - loss: 0.4671 - accuracy: 0.8098 - val_loss: 1.0101 - val_accuracy: 0.6546
Epoch 4/5
1400/1400 - 53s - loss: 0.2747 - accuracy: 0.8958 - val_loss: 1.4046 - val_accuracy: 0.6371
Epoch 5/5
1400/1400 - 53s - loss: 0.1685 - accuracy: 0.9399 - val_loss: 1.6658 - val_accuracy: 0.6414


Epoch 1/5
1400/1400 - 170s - loss: 0.7897 - accuracy: 0.6380 - val_loss: 0.7399 - val_accuracy: 0.6598
Epoch 2/5
1400/1400 - 168s - loss: 0.6537 - accuracy: 0.7094 - val_loss: 0.7626 - val_accuracy: 0.6662
Epoch 3/5
1400/1400 - 148s - loss: 0.4226 - accuracy: 0.8278 - val_loss: 1.1083 - val_accuracy: 0.6576
Epoch 4/5
1400/1400 - 148s - loss: 0.2596 - accuracy: 0.9014 - val_loss: 1.4351 - val_accuracy: 0.6471
Epoch 5/5
1400/1400 - 147s - loss: 0.1550 - accuracy: 0.9436 - val_loss: 1.7923 - val_accuracy: 0.6456


Epoch 1/5
1400/1400 - 82s - loss: 0.7927 - accuracy: 0.6364 - val_loss: 0.7414 - val_accuracy: 0.6584
Epoch 2/5
1400/1400 - 84s - loss: 0.6600 - accuracy: 0.7071 - val_loss: 0.7614 - val_accuracy: 0.6655
Epoch 3/5
1400/1400 - 73s - loss: 0.4406 - accuracy: 0.8216 - val_loss: 1.0456 - val_accuracy: 0.6548
Epoch 4/5
1400/1400 - 72s - loss: 0.2606 - accuracy: 0.9016 - val_loss: 1.4639 - val_accuracy: 0.6356
Epoch 5/5
1400/1400 - 72s - loss: 0.1571 - accuracy: 0.9438 - val_loss: 1.7392 - val_accuracy: 0.6536


Epoch 1/5
1400/1400 - 33s - loss: 0.7953 - accuracy: 0.6351 - val_loss: 0.7454 - val_accuracy: 0.6585
Epoch 2/5
1400/1400 - 33s - loss: 0.6764 - accuracy: 0.6974 - val_loss: 0.7517 - val_accuracy: 0.6669
Epoch 3/5
1400/1400 - 29s - loss: 0.4995 - accuracy: 0.7922 - val_loss: 0.9824 - val_accuracy: 0.6550
Epoch 4/5
1400/1400 - 29s - loss: 0.3138 - accuracy: 0.8787 - val_loss: 1.3742 - val_accuracy: 0.6363
Epoch 5/5
1400/1400 - 29s - loss: 0.2078 - accuracy: 0.9236 - val_loss: 1.6678 - val_accuracy: 0.6410


Epoch 1/5
1400/1400 - 39s - loss: 0.7940 - accuracy: 0.6357 - val_loss: 0.7434 - val_accuracy: 0.6580
Epoch 2/5
1400/1400 - 37s - loss: 0.6734 - accuracy: 0.6992 - val_loss: 0.7546 - val_accuracy: 0.6668
Epoch 3/5
1400/1400 - 32s - loss: 0.4954 - accuracy: 0.7946 - val_loss: 1.0005 - val_accuracy: 0.6571
Epoch 4/5
1400/1400 - 32s - loss: 0.3188 - accuracy: 0.8768 - val_loss: 1.3227 - val_accuracy: 0.6252
Epoch 5/5
1400/1400 - 32s - loss: 0.2117 - accuracy: 0.9210 - val_loss: 1.5785 - val_accuracy: 0.6438


Epoch 1/5
1400/1400 - 139s - loss: 0.7897 - accuracy: 0.6380 - val_loss: 0.7396 - val_accuracy: 0.6590
Epoch 2/5
1400/1400 - 140s - loss: 0.6529 - accuracy: 0.7093 - val_loss: 0.7649 - val_accuracy: 0.6652
Epoch 3/5
1400/1400 - 122s - loss: 0.4218 - accuracy: 0.8296 - val_loss: 1.1122 - val_accuracy: 0.6589
Epoch 4/5
1400/1400 - 122s - loss: 0.2567 - accuracy: 0.9022 - val_loss: 1.3953 - val_accuracy: 0.6502
Epoch 5/5
1400/1400 - 122s - loss: 0.1521 - accuracy: 0.9450 - val_loss: 1.8528 - val_accuracy: 0.6382


Epoch 1/5
1400/1400 - 154s - loss: 0.7893 - accuracy: 0.6382 - val_loss: 0.7395 - val_accuracy: 0.6592
Epoch 2/5
1400/1400 - 191s - loss: 0.6529 - accuracy: 0.7099 - val_loss: 0.7640 - val_accuracy: 0.6668
Epoch 3/5
1400/1400 - 148s - loss: 0.4219 - accuracy: 0.8290 - val_loss: 1.0748 - val_accuracy: 0.6601
Epoch 4/5
1400/1400 - 142s - loss: 0.2639 - accuracy: 0.8992 - val_loss: 1.4276 - val_accuracy: 0.6414
Epoch 5/5
1400/1400 - 140s - loss: 0.1546 - accuracy: 0.9440 - val_loss: 1.8172 - val_accuracy: 0.6489


Epoch 1/5
1400/1400 - 11s - loss: 0.8070 - accuracy: 0.6305 - val_loss: 0.7548 - val_accuracy: 0.6542
Epoch 2/5
1400/1400 - 12s - loss: 0.7166 - accuracy: 0.6754 - val_loss: 0.7507 - val_accuracy: 0.6559
Epoch 3/5
1400/1400 - 10s - loss: 0.6289 - accuracy: 0.7243 - val_loss: 0.7956 - val_accuracy: 0.6539
Epoch 4/5
1400/1400 - 10s - loss: 0.4987 - accuracy: 0.7936 - val_loss: 0.9410 - val_accuracy: 0.6484
Epoch 5/5
1400/1400 - 10s - loss: 0.3693 - accuracy: 0.8543 - val_loss: 1.2134 - val_accuracy: 0.6275


Epoch 1/5
1400/1400 - 13s - loss: 0.8030 - accuracy: 0.6313 - val_loss: 0.7568 - val_accuracy: 0.6517
Epoch 2/5
1400/1400 - 13s - loss: 0.7052 - accuracy: 0.6818 - val_loss: 0.7526 - val_accuracy: 0.6594
Epoch 3/5
1400/1400 - 11s - loss: 0.6001 - accuracy: 0.7396 - val_loss: 0.8506 - val_accuracy: 0.6525
Epoch 4/5
1400/1400 - 11s - loss: 0.4531 - accuracy: 0.8151 - val_loss: 1.0973 - val_accuracy: 0.6395
Epoch 5/5
1400/1400 - 12s - loss: 0.3338 - accuracy: 0.8679 - val_loss: 1.3641 - val_accuracy: 0.6248


Epoch 1/5
1400/1400 - 52s - loss: 0.7943 - accuracy: 0.6357 - val_loss: 0.7423 - val_accuracy: 0.6590
Epoch 2/5
1400/1400 - 52s - loss: 0.6679 - accuracy: 0.7026 - val_loss: 0.7538 - val_accuracy: 0.6676
Epoch 3/5
1400/1400 - 44s - loss: 0.4727 - accuracy: 0.8065 - val_loss: 1.0204 - val_accuracy: 0.6559
Epoch 4/5
1400/1400 - 44s - loss: 0.2881 - accuracy: 0.8898 - val_loss: 1.3695 - val_accuracy: 0.6380
Epoch 5/5
1400/1400 - 44s - loss: 0.1796 - accuracy: 0.9347 - val_loss: 1.6351 - val_accuracy: 0.6463


Epoch 1/5
1400/1400 - 81s - loss: 0.7916 - accuracy: 0.6368 - val_loss: 0.7410 - val_accuracy: 0.6590
Epoch 2/5
1400/1400 - 80s - loss: 0.6609 - accuracy: 0.7052 - val_loss: 0.7548 - val_accuracy: 0.6683
Epoch 3/5
1400/1400 - 71s - loss: 0.4487 - accuracy: 0.8174 - val_loss: 1.0579 - val_accuracy: 0.6571
Epoch 4/5
1400/1400 - 71s - loss: 0.2780 - accuracy: 0.8939 - val_loss: 1.3391 - val_accuracy: 0.6424
Epoch 5/5
1400/1400 - 71s - loss: 0.1668 - accuracy: 0.9393 - val_loss: 1.7600 - val_accuracy: 0.6500


INFO:tensorflow:Oracle triggered exit


In [9]:
tuner.results_summary()