# Keras Tuner BarsTri

In [1]:
import sys

import numpy as np
import pandas as pd
import sklearn.preprocessing

sys.path.append("..")
from helpers import filename_for

In [2]:
import kerastuner
import tensorflow
from tensorflow import keras

print("tensorflow", tensorflow.__version__)
print("keras", keras.__version__)
print("keras-tuner", kerastuner.__version__)

tensorflow 2.3.1
keras 2.4.0
keras-tuner 1.0.1


In [3]:
class DataGeneratorBars(keras.utils.Sequence):
    def __init__(self, config):
        self.c = config

        self.labels = []
        self.features = []

        self.scaler_tri = sklearn.preprocessing.MaxAbsScaler()
        self.scaler_e = sklearn.preprocessing.MaxAbsScaler()
        self.scaler_t = sklearn.preprocessing.MaxAbsScaler()

        file = filename_for(
            self.c["distance"],
            self.c["doubleplane"],
            self.c["energy"],
            self.c["erel"],
            self.c["neutrons"][0],
            "inclxx",
            self.c["subruns"][0],
            "bars.parquet",
        )
        data = pd.read_parquet(file)
        rows = len(data.index)
        del data

        self.batches_per_subrun = (rows * len(self.c["neutrons"])) // self.c["batch_size"]
        self.batches_per_cache = self.batches_per_subrun * self.c["subrun_cache_size"]
        self.len = self.batches_per_subrun * len(self.c["subruns"])

        self.cache_subruns = [
            self.c["subruns"][i : i + self.c["subrun_cache_size"]]
            for i in range(0, len(self.c["subruns"]), self.c["subrun_cache_size"])
        ]
        self.current_cache = -1

        print(f"Rows in one file: {rows}")
        print(f"{self.batches_per_subrun} batches per subrun")
        print(f"{self.len} total batches in {self.cache_subruns} caches")

        self.fitscalers()
        self.load(0)

    def __len__(self):
        return self.len

    def __getitem__(self, index):
        cacheid = index // self.batches_per_cache
        i = index % (self.batches_per_cache)
        # print(f"{index} -> c{cacheid}-i{i}")

        if cacheid != self.current_cache:
            self.load(cacheid)

        a = i * self.c["batch_size"]
        b = (i + 1) * self.c["batch_size"]

        x = self.features[a:b]
        y = self.labels[a:b]
        return x, y

    def load(self, cacheid):
        subruns = self.cache_subruns[cacheid]
        print(f"Loading subruns {subruns} for cache {cacheid}")

        files = [
            filename_for(
                self.c["distance"],
                self.c["doubleplane"],
                self.c["energy"],
                self.c["erel"],
                n,
                "inclxx",
                subrun,
                "bars.parquet",
            )
            for n in self.c["neutrons"]
            for subrun in subruns
        ]
        data = pd.concat([pd.read_parquet(file) for file in files], ignore_index=True).sample(frac=1)
        data.loc[data["nHits"] == 0, self.c["label"]] = 0

        self.current_cache = cacheid
        self.features = np.concatenate(
            (
                self.scaler_tri.transform(data[self.c["cols_tri"]]),
                self.scaler_e.transform(data[self.c["cols_e"]].values.reshape(-1, 1)).reshape(
                    -1, len(self.c["cols_e"])
                ),
                self.scaler_t.transform(data[self.c["cols_t"]].values.reshape(-1, 1)).reshape(
                    -1, len(self.c["cols_t"])
                ),
            ),
            axis=1,
        )
        self.labels = keras.utils.to_categorical(
            data[[self.c["label"]]].values.ravel(), num_classes=len(self.c["neutrons"]) + 1
        )
        del data

    def fitscalers(self):
        subruns = range(5)  # self.cache_subruns[0]
        files = [
            filename_for(
                self.c["distance"],
                self.c["doubleplane"],
                self.c["energy"],
                self.c["erel"],
                n,
                "inclxx",
                subrun,
                "bars.parquet",
            )
            for n in self.c["neutrons"]
            for subrun in subruns
        ]
        data = pd.concat([pd.read_parquet(file) for file in files], ignore_index=True)
        self.scaler_tri.fit(data[self.c["cols_tri"]])
        self.scaler_e.fit(data[self.c["cols_e"]].values.reshape(-1, 1))
        self.scaler_t.fit(data[self.c["cols_t"]].values.reshape(-1, 1))
        del data

In [4]:
class Model(kerastuner.HyperModel):
    def __init__(self, num_classes):
        self.num_classes = num_classes

    def build(self, hp):
        model = keras.models.Sequential()
        model.add(
            keras.layers.Dense(
                units=hp.Int(f"relu1nodes", min_value=100, max_value=20000, step=1000), activation="relu",
            )
        )
        model.add(
            keras.layers.Dense(units=hp.Int(f"relu2nodes", min_value=10, max_value=3000, step=100), activation="relu",)
        )
        model.add(keras.layers.Dense(units=self.num_classes, activation="softmax"))

        loss = keras.losses.CategoricalCrossentropy()
        optimizer = keras.optimizers.Adam()
        model.compile(loss=loss, optimizer=optimizer, metrics=["accuracy"])

        return model

In [5]:
config = {
    "distance": 15,
    "doubleplane": 30,
    "energy": 600,
    "erel": 500,
    "neutrons": [1, 2, 3, 4],
    "subruns": range(7),  # range(19),
    "subrun_cache_size": 7,
    "batch_size": 200,
    "cols_tri": ["nHits", "nClus", "Edep"],
    "cols_e": [str(i) for i in range(0, 30 * 100 * 2, 2)],
    "cols_t": [str(i + 1) for i in range(0, 30 * 100 * 2, 2)],
    "label": "nPN",
}

validation_config = config.copy()
validation_config["subruns"] = [19]

In [6]:
model = Model(len(config["neutrons"]) + 1)
generator = DataGeneratorBars(config)
validation_generator = DataGeneratorBars(validation_config)

Rows in one file: 10000
200 batches per subrun
1400 total batches in [range(0, 7)] caches
Loading subruns range(0, 7) for cache 0
Rows in one file: 10000
200 batches per subrun
200 total batches in [[19]] caches
Loading subruns [19] for cache 0


In [7]:
# tuner = kerastuner.tuners.BayesianOptimization(
tuner = kerastuner.tuners.RandomSearch(
    model,
    objective="val_accuracy",
    max_trials=25,
    executions_per_trial=1,
    directory="kerastuner",
    project_name="barstri-tf231",
    overwrite=True,
)
tuner.search_space_summary()

In [8]:
tuner.search(generator, verbose=2, epochs=3, shuffle=False, validation_data=validation_generator)

Epoch 1/3
1400/1400 - 64s - loss: 0.6630 - accuracy: 0.7092 - val_loss: 0.6295 - val_accuracy: 0.7225
Epoch 2/3
1400/1400 - 75s - loss: 0.5948 - accuracy: 0.7414 - val_loss: 0.6241 - val_accuracy: 0.7274
Epoch 3/3
1400/1400 - 68s - loss: 0.4950 - accuracy: 0.7909 - val_loss: 0.7025 - val_accuracy: 0.7160


Epoch 1/3
1400/1400 - 95s - loss: 0.6674 - accuracy: 0.7077 - val_loss: 0.6342 - val_accuracy: 0.7208
Epoch 2/3
1400/1400 - 116s - loss: 0.6016 - accuracy: 0.7378 - val_loss: 0.6268 - val_accuracy: 0.7258
Epoch 3/3
1400/1400 - 87s - loss: 0.5170 - accuracy: 0.7817 - val_loss: 0.6819 - val_accuracy: 0.7171


Epoch 1/3
1400/1400 - 204s - loss: 0.6639 - accuracy: 0.7086 - val_loss: 0.6308 - val_accuracy: 0.7222
Epoch 2/3
1400/1400 - 198s - loss: 0.5881 - accuracy: 0.7454 - val_loss: 0.6335 - val_accuracy: 0.7240
Epoch 3/3
1400/1400 - 161s - loss: 0.4700 - accuracy: 0.8044 - val_loss: 0.7514 - val_accuracy: 0.7056


Epoch 1/3
1400/1400 - 68s - loss: 0.6650 - accuracy: 0.7088 - val_loss: 0.6292 - val_accuracy: 0.7236
Epoch 2/3
1400/1400 - 53s - loss: 0.5996 - accuracy: 0.7386 - val_loss: 0.6250 - val_accuracy: 0.7285
Epoch 3/3
1400/1400 - 60s - loss: 0.5134 - accuracy: 0.7829 - val_loss: 0.6952 - val_accuracy: 0.7158


Epoch 1/3
1400/1400 - 159s - loss: 0.6631 - accuracy: 0.7090 - val_loss: 0.6303 - val_accuracy: 0.7217
Epoch 2/3
1400/1400 - 150s - loss: 0.5867 - accuracy: 0.7461 - val_loss: 0.6320 - val_accuracy: 0.7247
Epoch 3/3
1400/1400 - 122s - loss: 0.4634 - accuracy: 0.8069 - val_loss: 0.7729 - val_accuracy: 0.7011


Epoch 1/3
1400/1400 - 53s - loss: 0.6668 - accuracy: 0.7076 - val_loss: 0.6289 - val_accuracy: 0.7247
Epoch 2/3
1400/1400 - 48s - loss: 0.6017 - accuracy: 0.7373 - val_loss: 0.6228 - val_accuracy: 0.7283
Epoch 3/3
1400/1400 - 74s - loss: 0.5255 - accuracy: 0.7760 - val_loss: 0.6789 - val_accuracy: 0.7182


Epoch 1/3
1400/1400 - 43s - loss: 0.6758 - accuracy: 0.7062 - val_loss: 0.6260 - val_accuracy: 0.7252
Epoch 2/3
1400/1400 - 40s - loss: 0.6157 - accuracy: 0.7312 - val_loss: 0.6175 - val_accuracy: 0.7289
Epoch 3/3
1400/1400 - 37s - loss: 0.5869 - accuracy: 0.7445 - val_loss: 0.6198 - val_accuracy: 0.7293


Epoch 1/3
1400/1400 - 172s - loss: 0.6621 - accuracy: 0.7088 - val_loss: 0.6307 - val_accuracy: 0.7222
Epoch 2/3
1400/1400 - 181s - loss: 0.5843 - accuracy: 0.7470 - val_loss: 0.6303 - val_accuracy: 0.7254
Epoch 3/3
1400/1400 - 148s - loss: 0.4558 - accuracy: 0.8093 - val_loss: 0.7886 - val_accuracy: 0.7071


Epoch 1/3
1400/1400 - 43s - loss: 0.6668 - accuracy: 0.7082 - val_loss: 0.6290 - val_accuracy: 0.7230
Epoch 2/3
1400/1400 - 37s - loss: 0.6060 - accuracy: 0.7353 - val_loss: 0.6182 - val_accuracy: 0.7300
Epoch 3/3
1400/1400 - 41s - loss: 0.5490 - accuracy: 0.7635 - val_loss: 0.6488 - val_accuracy: 0.7231


Epoch 1/3
1400/1400 - 78s - loss: 0.6629 - accuracy: 0.7092 - val_loss: 0.6308 - val_accuracy: 0.7226
Epoch 2/3
1400/1400 - 75s - loss: 0.5921 - accuracy: 0.7426 - val_loss: 0.6263 - val_accuracy: 0.7278
Epoch 3/3
1400/1400 - 75s - loss: 0.4829 - accuracy: 0.7969 - val_loss: 0.7337 - val_accuracy: 0.7111


Epoch 1/3
1400/1400 - 48s - loss: 0.6666 - accuracy: 0.7082 - val_loss: 0.6270 - val_accuracy: 0.7253
Epoch 2/3
1400/1400 - 42s - loss: 0.6062 - accuracy: 0.7353 - val_loss: 0.6183 - val_accuracy: 0.7300
Epoch 3/3
1400/1400 - 36s - loss: 0.5516 - accuracy: 0.7620 - val_loss: 0.6404 - val_accuracy: 0.7216


Epoch 1/3
1400/1400 - 200s - loss: 0.6612 - accuracy: 0.7093 - val_loss: 0.6314 - val_accuracy: 0.7222
Epoch 2/3
1400/1400 - 171s - loss: 0.5848 - accuracy: 0.7469 - val_loss: 0.6296 - val_accuracy: 0.7244
Epoch 3/3
1400/1400 - 168s - loss: 0.4613 - accuracy: 0.8065 - val_loss: 0.7671 - val_accuracy: 0.7083


Epoch 1/3
1400/1400 - 36s - loss: 0.6655 - accuracy: 0.7087 - val_loss: 0.6283 - val_accuracy: 0.7242
Epoch 2/3
1400/1400 - 40s - loss: 0.6049 - accuracy: 0.7361 - val_loss: 0.6193 - val_accuracy: 0.7301
Epoch 3/3
1400/1400 - 26s - loss: 0.5446 - accuracy: 0.7656 - val_loss: 0.6506 - val_accuracy: 0.7229


Epoch 1/3
1400/1400 - 65s - loss: 0.6753 - accuracy: 0.7054 - val_loss: 0.6269 - val_accuracy: 0.7247
Epoch 2/3
1400/1400 - 36s - loss: 0.6128 - accuracy: 0.7328 - val_loss: 0.6210 - val_accuracy: 0.7284
Epoch 3/3
1400/1400 - 45s - loss: 0.5707 - accuracy: 0.7544 - val_loss: 0.6337 - val_accuracy: 0.7224


Epoch 1/3
1400/1400 - 60s - loss: 0.6634 - accuracy: 0.7093 - val_loss: 0.6297 - val_accuracy: 0.7222
Epoch 2/3
1400/1400 - 64s - loss: 0.5912 - accuracy: 0.7431 - val_loss: 0.6263 - val_accuracy: 0.7276
Epoch 3/3
1400/1400 - 50s - loss: 0.4802 - accuracy: 0.7989 - val_loss: 0.7311 - val_accuracy: 0.7112


Epoch 1/3
1400/1400 - 42s - loss: 0.6656 - accuracy: 0.7088 - val_loss: 0.6280 - val_accuracy: 0.7247
Epoch 2/3
1400/1400 - 50s - loss: 0.6045 - accuracy: 0.7361 - val_loss: 0.6197 - val_accuracy: 0.7297
Epoch 3/3
1400/1400 - 54s - loss: 0.5435 - accuracy: 0.7663 - val_loss: 0.6541 - val_accuracy: 0.7225


Epoch 1/3
1400/1400 - 55s - loss: 0.6649 - accuracy: 0.7087 - val_loss: 0.6284 - val_accuracy: 0.7234
Epoch 2/3
1400/1400 - 56s - loss: 0.6010 - accuracy: 0.7379 - val_loss: 0.6210 - val_accuracy: 0.7294
Epoch 3/3
1400/1400 - 46s - loss: 0.5201 - accuracy: 0.7791 - val_loss: 0.6711 - val_accuracy: 0.7188


Epoch 1/3
1400/1400 - 50s - loss: 0.6664 - accuracy: 0.7081 - val_loss: 0.6294 - val_accuracy: 0.7235
Epoch 2/3
1400/1400 - 43s - loss: 0.6050 - accuracy: 0.7359 - val_loss: 0.6201 - val_accuracy: 0.7297
Epoch 3/3
1400/1400 - 59s - loss: 0.5418 - accuracy: 0.7679 - val_loss: 0.6536 - val_accuracy: 0.7240


Epoch 1/3
1400/1400 - 109s - loss: 0.6627 - accuracy: 0.7092 - val_loss: 0.6301 - val_accuracy: 0.7222
Epoch 2/3
1400/1400 - 96s - loss: 0.5865 - accuracy: 0.7460 - val_loss: 0.6329 - val_accuracy: 0.7228
Epoch 3/3
1400/1400 - 84s - loss: 0.4643 - accuracy: 0.8063 - val_loss: 0.7698 - val_accuracy: 0.7118


Epoch 1/3
1400/1400 - 37s - loss: 0.6671 - accuracy: 0.7087 - val_loss: 0.6268 - val_accuracy: 0.7250
Epoch 2/3
1400/1400 - 27s - loss: 0.6083 - accuracy: 0.7337 - val_loss: 0.6186 - val_accuracy: 0.7289
Epoch 3/3
1400/1400 - 32s - loss: 0.5603 - accuracy: 0.7567 - val_loss: 0.6406 - val_accuracy: 0.7230


Epoch 1/3
1400/1400 - 54s - loss: 0.6660 - accuracy: 0.7088 - val_loss: 0.6259 - val_accuracy: 0.7262
Epoch 2/3
1400/1400 - 55s - loss: 0.6063 - accuracy: 0.7353 - val_loss: 0.6185 - val_accuracy: 0.7287
Epoch 3/3
1400/1400 - 56s - loss: 0.5501 - accuracy: 0.7630 - val_loss: 0.6455 - val_accuracy: 0.7219


Epoch 1/3
1400/1400 - 69s - loss: 0.6639 - accuracy: 0.7091 - val_loss: 0.6292 - val_accuracy: 0.7229
Epoch 2/3
1400/1400 - 59s - loss: 0.5964 - accuracy: 0.7401 - val_loss: 0.6227 - val_accuracy: 0.7284
Epoch 3/3
1400/1400 - 65s - loss: 0.4995 - accuracy: 0.7893 - val_loss: 0.7028 - val_accuracy: 0.7127


Epoch 1/3
1400/1400 - 66s - loss: 0.6632 - accuracy: 0.7093 - val_loss: 0.6296 - val_accuracy: 0.7224
Epoch 2/3
1400/1400 - 72s - loss: 0.5938 - accuracy: 0.7416 - val_loss: 0.6239 - val_accuracy: 0.7273
Epoch 3/3
1400/1400 - 65s - loss: 0.4885 - accuracy: 0.7941 - val_loss: 0.7184 - val_accuracy: 0.7158


Epoch 1/3
1400/1400 - 210s - loss: 0.6613 - accuracy: 0.7091 - val_loss: 0.6299 - val_accuracy: 0.7222
Epoch 2/3
1400/1400 - 151s - loss: 0.5867 - accuracy: 0.7457 - val_loss: 0.6294 - val_accuracy: 0.7249
Epoch 3/3
1400/1400 - 135s - loss: 0.4655 - accuracy: 0.8053 - val_loss: 0.7795 - val_accuracy: 0.7110


Epoch 1/3
1400/1400 - 98s - loss: 0.6663 - accuracy: 0.7083 - val_loss: 0.6336 - val_accuracy: 0.7208
Epoch 2/3
1400/1400 - 88s - loss: 0.6028 - accuracy: 0.7368 - val_loss: 0.6289 - val_accuracy: 0.7245
Epoch 3/3
1400/1400 - 98s - loss: 0.5223 - accuracy: 0.7792 - val_loss: 0.6761 - val_accuracy: 0.7165


INFO:tensorflow:Oracle triggered exit


In [9]:
tuner.results_summary()