# Keras Tuner BarsTri

In [1]:
import sys
import time

import numpy as np
import pandas as pd
import sklearn.preprocessing

sys.path.append("..")
from helpers import filename_for

In [2]:
import kerastuner
import tensorflow
from tensorflow import keras

print("tensorflow", tensorflow.__version__)
print("keras", keras.__version__)
print("keras-tuner", kerastuner.__version__)

tensorflow 2.1.0
keras 2.2.4-tf
keras-tuner 1.0.1


In [3]:
class DataGeneratorBars(keras.utils.Sequence):
    def __init__(self, config):
        self.c = config

        self.labels = []
        self.features = []

        self.scaler_tri = sklearn.preprocessing.MaxAbsScaler()
        self.scaler_e = sklearn.preprocessing.MaxAbsScaler()
        self.scaler_t = sklearn.preprocessing.MaxAbsScaler()

        file = filename_for(
            self.c["distance"],
            self.c["doubleplane"],
            self.c["energy"],
            self.c["erel"],
            self.c["neutrons"][0],
            "inclxx",
            self.c["subruns"][0],
            "bars.parquet",
        )
        data = pd.read_parquet(file)
        rows = len(data.index)
        del data

        self.batches_per_subrun = (rows * len(self.c["neutrons"])) // self.c["batch_size"]
        self.batches_per_cache = self.batches_per_subrun * self.c["subrun_cache_size"]
        self.len = self.batches_per_subrun * len(self.c["subruns"])

        self.cache_subruns = [
            self.c["subruns"][i : i + self.c["subrun_cache_size"]]
            for i in range(0, len(self.c["subruns"]), self.c["subrun_cache_size"])
        ]
        self.current_cache = -1

        print(f"Rows in one file: {rows}")
        print(f"{self.batches_per_subrun} batches per subrun")
        print(f"{self.len} total batches in {self.cache_subruns} caches")

        self.fitscalers()
        self.load(0)

    def __len__(self):
        return self.len

    def __getitem__(self, index):
        cacheid = index // self.batches_per_cache
        i = index % (self.batches_per_cache)
        # print(f"{index} -> c{cacheid}-i{i}")

        if cacheid != self.current_cache:
            self.load(cacheid)

        a = i * self.c["batch_size"]
        b = (i + 1) * self.c["batch_size"]

        x = self.features[a:b]
        y = self.labels[a:b]
        return x, y

    def load(self, cacheid):
        subruns = self.cache_subruns[cacheid]
        print(f"Loading subruns {subruns} for cache {cacheid}")

        files = [
            filename_for(
                self.c["distance"],
                self.c["doubleplane"],
                self.c["energy"],
                self.c["erel"],
                n,
                "inclxx",
                subrun,
                "bars.parquet",
            )
            for n in self.c["neutrons"]
            for subrun in subruns
        ]
        data = pd.concat([pd.read_parquet(file) for file in files], ignore_index=True).sample(frac=1)
        data.loc[data["nHits"] == 0, self.c["label"]] = 0

        self.current_cache = cacheid
        self.features = np.concatenate(
            (
                self.scaler_tri.transform(data[self.c["cols_tri"]]),
                self.scaler_e.transform(data[self.c["cols_e"]].values.reshape(-1, 1)).reshape(
                    -1, len(self.c["cols_e"])
                ),
                self.scaler_t.transform(data[self.c["cols_t"]].values.reshape(-1, 1)).reshape(
                    -1, len(self.c["cols_t"])
                ),
            ),
            axis=1,
        )
        self.labels = keras.utils.to_categorical(
            data[[self.c["label"]]].values.ravel(), num_classes=len(self.c["neutrons"]) + 1
        )
        del data

    def fitscalers(self):
        subruns = range(5)  # self.cache_subruns[0]
        files = [
            filename_for(
                self.c["distance"],
                self.c["doubleplane"],
                self.c["energy"],
                self.c["erel"],
                n,
                "inclxx",
                subrun,
                "bars.parquet",
            )
            for n in self.c["neutrons"]
            for subrun in subruns
        ]
        data = pd.concat([pd.read_parquet(file) for file in files], ignore_index=True)
        self.scaler_tri.fit(data[self.c["cols_tri"]])
        self.scaler_e.fit(data[self.c["cols_e"]].values.reshape(-1, 1))
        self.scaler_t.fit(data[self.c["cols_t"]].values.reshape(-1, 1))
        del data

In [4]:
class Model(kerastuner.HyperModel):
    def __init__(self, num_classes):
        self.num_classes = num_classes

    def build(self, hp):
        model = keras.models.Sequential()
        model.add(
            keras.layers.Dense(
                units=hp.Int(f"relu1nodes", min_value=1000, max_value=10000, step=3000), activation="relu",
            )
        )
        model.add(
            keras.layers.Dense(units=hp.Int(f"relu2nodes", min_value=500, max_value=3000, step=200), activation="relu",)
        )
        model.add(keras.layers.Dense(units=self.num_classes, activation="softmax"))

        optimizer = hp.Choice("optimizer", ["adam", "adagrad", "adadelta", "SGD"])
        loss = keras.losses.CategoricalCrossentropy()

        model.compile(loss=loss, optimizer=optimizer, metrics=["accuracy"])

        return model

In [5]:
config = {
    "distance": 15,
    "doubleplane": 30,
    "energy": 600,
    "erel": 500,
    "neutrons": [1, 2, 3, 4],
    "subruns": range(7),  # range(19),
    "subrun_cache_size": 7,
    "batch_size": 200,
    "cols_tri": ["nHits", "nClus", "Edep"],
    "cols_e": [str(i) for i in range(0, 30 * 100 * 2, 2)],
    "cols_t": [str(i + 1) for i in range(0, 30 * 100 * 2, 2)],
    "label": "nPN",
}

validation_config = config.copy()
validation_config["subruns"] = [19]

In [6]:
model = Model(len(config["neutrons"]) + 1)
generator = DataGeneratorBars(config)
validation_generator = DataGeneratorBars(validation_config)

tuner = kerastuner.tuners.BayesianOptimization(
    # tuner = kerastuner.tuners.RandomSearch(
    model,
    objective="val_accuracy",
    max_trials=20,
    executions_per_trial=1,
    directory="kerastuner",
    project_name="barstri-ram-V0",
    # , overwrite=True
)
tuner.search_space_summary()

Rows in one file: 10000 -> 200 batches per subrun -> 1400 total batches in [range(0, 7)] caches
Loading subruns range(0, 7) for cache 0
Rows in one file: 10000 -> 200 batches per subrun -> 200 total batches in [[19]] caches
Loading subruns [19] for cache 0


In [7]:
tuner.search(generator, verbose=2, epochs=3, shuffle=False, validation_data=validation_generator)

  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 35s - loss: 1.3641 - accuracy: 0.4329 - val_loss: 1.1255 - val_accuracy: 0.6032
Epoch 2/3
1400/1400 - 28s - loss: 0.9381 - accuracy: 0.6465 - val_loss: 0.8353 - val_accuracy: 0.6746
Epoch 3/3
1400/1400 - 54s - loss: 0.8047 - accuracy: 0.6742 - val_loss: 0.7808 - val_accuracy: 0.6805


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 53s - loss: 0.6951 - accuracy: 0.7029 - val_loss: 0.6480 - val_accuracy: 0.7182
Epoch 2/3
1400/1400 - 56s - loss: 0.6349 - accuracy: 0.7284 - val_loss: 0.6331 - val_accuracy: 0.7334
Epoch 3/3
1400/1400 - 72s - loss: 0.6236 - accuracy: 0.7365 - val_loss: 0.6279 - val_accuracy: 0.7320


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 77s - loss: 0.6854 - accuracy: 0.7040 - val_loss: 0.6412 - val_accuracy: 0.7189
Epoch 2/3
1400/1400 - 66s - loss: 0.6300 - accuracy: 0.7337 - val_loss: 0.6297 - val_accuracy: 0.7318
Epoch 3/3
1400/1400 - 78s - loss: 0.6204 - accuracy: 0.7360 - val_loss: 0.6253 - val_accuracy: 0.7317


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 84s - loss: 0.6668 - accuracy: 0.7089 - val_loss: 0.6281 - val_accuracy: 0.7239
Epoch 2/3
1400/1400 - 74s - loss: 0.5986 - accuracy: 0.7400 - val_loss: 0.6371 - val_accuracy: 0.7204
Epoch 3/3
1400/1400 - 87s - loss: 0.4918 - accuracy: 0.7942 - val_loss: 0.7220 - val_accuracy: 0.7103


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 60s - loss: 1.2707 - accuracy: 0.5047 - val_loss: 0.9889 - val_accuracy: 0.6470
Epoch 2/3
1400/1400 - 65s - loss: 0.8759 - accuracy: 0.6632 - val_loss: 0.8130 - val_accuracy: 0.6770
Epoch 3/3
1400/1400 - 58s - loss: 0.7916 - accuracy: 0.6774 - val_loss: 0.7724 - val_accuracy: 0.6836


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 38s - loss: 0.6659 - accuracy: 0.7091 - val_loss: 0.6280 - val_accuracy: 0.7225
Epoch 2/3
1400/1400 - 34s - loss: 0.6005 - accuracy: 0.7388 - val_loss: 0.6340 - val_accuracy: 0.7207
Epoch 3/3
1400/1400 - 43s - loss: 0.5097 - accuracy: 0.7849 - val_loss: 0.6982 - val_accuracy: 0.7145


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 107s - loss: 0.6630 - accuracy: 0.7101 - val_loss: 0.6285 - val_accuracy: 0.7214
Epoch 2/3
1400/1400 - 80s - loss: 0.5942 - accuracy: 0.7422 - val_loss: 0.6373 - val_accuracy: 0.7214
Epoch 3/3
1400/1400 - 81s - loss: 0.4766 - accuracy: 0.8003 - val_loss: 0.7498 - val_accuracy: 0.7089


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 65s - loss: 0.6630 - accuracy: 0.7102 - val_loss: 0.6282 - val_accuracy: 0.7224
Epoch 2/3
1400/1400 - 46s - loss: 0.5955 - accuracy: 0.7419 - val_loss: 0.6359 - val_accuracy: 0.7224
Epoch 3/3
1400/1400 - 44s - loss: 0.4851 - accuracy: 0.7959 - val_loss: 0.7374 - val_accuracy: 0.7121


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 79s - loss: 0.6643 - accuracy: 0.7099 - val_loss: 0.6285 - val_accuracy: 0.7220
Epoch 2/3
1400/1400 - 63s - loss: 0.5978 - accuracy: 0.7402 - val_loss: 0.6360 - val_accuracy: 0.7214
Epoch 3/3
1400/1400 - 90s - loss: 0.4901 - accuracy: 0.7929 - val_loss: 0.7299 - val_accuracy: 0.7125


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 124s - loss: 1.5793 - accuracy: 0.2549 - val_loss: 1.5494 - val_accuracy: 0.2580
Epoch 2/3
1400/1400 - 105s - loss: 1.5177 - accuracy: 0.2638 - val_loss: 1.4862 - val_accuracy: 0.2688
Epoch 3/3
1400/1400 - 107s - loss: 1.4566 - accuracy: 0.2767 - val_loss: 1.4284 - val_accuracy: 0.2861


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 48s - loss: 0.6671 - accuracy: 0.7087 - val_loss: 0.6278 - val_accuracy: 0.7239
Epoch 2/3
1400/1400 - 31s - loss: 0.6076 - accuracy: 0.7353 - val_loss: 0.6287 - val_accuracy: 0.7237
Epoch 3/3
1400/1400 - 31s - loss: 0.5510 - accuracy: 0.7627 - val_loss: 0.6624 - val_accuracy: 0.7182


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 32s - loss: 1.5978 - accuracy: 0.2433 - val_loss: 1.5843 - val_accuracy: 0.2584
Epoch 2/3
1400/1400 - 25s - loss: 1.5687 - accuracy: 0.2553 - val_loss: 1.5520 - val_accuracy: 0.2529
Epoch 3/3
1400/1400 - 29s - loss: 1.5337 - accuracy: 0.2523 - val_loss: 1.5147 - val_accuracy: 0.2517


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 32s - loss: 1.5981 - accuracy: 0.2839 - val_loss: 1.5835 - val_accuracy: 0.3799
Epoch 2/3
1400/1400 - 30s - loss: 1.5666 - accuracy: 0.3875 - val_loss: 1.5490 - val_accuracy: 0.3677
Epoch 3/3
1400/1400 - 39s - loss: 1.5304 - accuracy: 0.3315 - val_loss: 1.5114 - val_accuracy: 0.3020


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 67s - loss: 0.6640 - accuracy: 0.7098 - val_loss: 0.6273 - val_accuracy: 0.7232
Epoch 2/3
1400/1400 - 50s - loss: 0.5964 - accuracy: 0.7410 - val_loss: 0.6371 - val_accuracy: 0.7218
Epoch 3/3
1400/1400 - 57s - loss: 0.4910 - accuracy: 0.7936 - val_loss: 0.7254 - val_accuracy: 0.7107


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 99s - loss: 0.6635 - accuracy: 0.7098 - val_loss: 0.6275 - val_accuracy: 0.7229
Epoch 2/3
1400/1400 - 86s - loss: 0.5931 - accuracy: 0.7430 - val_loss: 0.6396 - val_accuracy: 0.7219
Epoch 3/3
1400/1400 - 75s - loss: 0.4739 - accuracy: 0.8010 - val_loss: 0.7612 - val_accuracy: 0.7076


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 74s - loss: 1.2888 - accuracy: 0.4853 - val_loss: 1.0087 - val_accuracy: 0.6379
Epoch 2/3
1400/1400 - 71s - loss: 0.8841 - accuracy: 0.6607 - val_loss: 0.8164 - val_accuracy: 0.6766
Epoch 3/3
1400/1400 - 69s - loss: 0.7940 - accuracy: 0.6769 - val_loss: 0.7743 - val_accuracy: 0.6828


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 94s - loss: 0.6643 - accuracy: 0.7094 - val_loss: 0.6285 - val_accuracy: 0.7214
Epoch 2/3
1400/1400 - 103s - loss: 0.5945 - accuracy: 0.7422 - val_loss: 0.6361 - val_accuracy: 0.7230
Epoch 3/3
1400/1400 - 100s - loss: 0.4785 - accuracy: 0.7990 - val_loss: 0.7353 - val_accuracy: 0.7095


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 29s - loss: 0.6645 - accuracy: 0.7097 - val_loss: 0.6293 - val_accuracy: 0.7222
Epoch 2/3
1400/1400 - 46s - loss: 0.6053 - accuracy: 0.7361 - val_loss: 0.6319 - val_accuracy: 0.7236
Epoch 3/3
1400/1400 - 33s - loss: 0.5400 - accuracy: 0.7686 - val_loss: 0.6761 - val_accuracy: 0.7198


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 74s - loss: 0.6856 - accuracy: 0.7040 - val_loss: 0.6415 - val_accuracy: 0.7185
Epoch 2/3
1400/1400 - 76s - loss: 0.6301 - accuracy: 0.7335 - val_loss: 0.6298 - val_accuracy: 0.7319
Epoch 3/3
1400/1400 - 71s - loss: 0.6203 - accuracy: 0.7365 - val_loss: 0.6256 - val_accuracy: 0.7312


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 67s - loss: 0.6661 - accuracy: 0.7087 - val_loss: 0.6285 - val_accuracy: 0.7234
Epoch 2/3
1400/1400 - 46s - loss: 0.6000 - accuracy: 0.7392 - val_loss: 0.6342 - val_accuracy: 0.7219
Epoch 3/3
1400/1400 - 46s - loss: 0.5008 - accuracy: 0.7892 - val_loss: 0.6948 - val_accuracy: 0.7143


INFO:tensorflow:Oracle triggered exit


In [8]:
tuner.results_summary()