# Keras Tuner Bars

In [1]:
import sys
import time

import numpy as np
import pandas as pd
import sklearn.preprocessing

sys.path.append("..")
from helpers import filename_for

In [2]:
import kerastuner
import tensorflow
from tensorflow import keras

print("tensorflow", tensorflow.__version__)
print("keras", keras.__version__)
print("keras-tuner", kerastuner.__version__)

tensorflow 2.1.0
keras 2.2.4-tf
keras-tuner 1.0.1


In [3]:
class DataGeneratorBars(keras.utils.Sequence):
    def __init__(self, config):
        self.c = config

        self.labels = []
        self.features = []

        self.scaler_tri = sklearn.preprocessing.MaxAbsScaler()
        self.scaler_e = sklearn.preprocessing.MaxAbsScaler()
        self.scaler_t = sklearn.preprocessing.MaxAbsScaler()

        file = filename_for(
            self.c["distance"],
            self.c["doubleplane"],
            self.c["energy"],
            self.c["erel"],
            self.c["neutrons"][0],
            "inclxx",
            self.c["subruns"][0],
            "bars.parquet",
        )
        data = pd.read_parquet(file)
        rows = len(data.index)
        del data

        self.batches_per_subrun = (rows * len(self.c["neutrons"])) // self.c["batch_size"]
        self.batches_per_cache = self.batches_per_subrun * self.c["subrun_cache_size"]
        self.len = self.batches_per_subrun * len(self.c["subruns"])

        self.cache_subruns = [
            self.c["subruns"][i : i + self.c["subrun_cache_size"]]
            for i in range(0, len(self.c["subruns"]), self.c["subrun_cache_size"])
        ]
        self.current_cache = -1

        print(f"Rows in one file: {rows}")
        print(f"{self.batches_per_subrun} batches per subrun")
        print(f"{self.len} total batches in {self.cache_subruns} caches")

        self.fitscalers()
        self.load(0)

    def __len__(self):
        return self.len

    def __getitem__(self, index):
        cacheid = index // self.batches_per_cache
        i = index % (self.batches_per_cache)
        # print(f"{index} -> c{cacheid}-i{i}")

        if cacheid != self.current_cache:
            self.load(cacheid)

        a = i * self.c["batch_size"]
        b = (i + 1) * self.c["batch_size"]

        x = self.features[a:b]
        y = self.labels[a:b]
        return x, y

    def load(self, cacheid):
        subruns = self.cache_subruns[cacheid]
        print(f"Loading subruns {subruns} for cache {cacheid}")

        files = [
            filename_for(
                self.c["distance"],
                self.c["doubleplane"],
                self.c["energy"],
                self.c["erel"],
                n,
                "inclxx",
                subrun,
                "bars.parquet",
            )
            for n in self.c["neutrons"]
            for subrun in subruns
        ]
        data = pd.concat([pd.read_parquet(file) for file in files], ignore_index=True).sample(frac=1)
        data.loc[data["nHits"] == 0, self.c["label"]] = 0

        self.current_cache = cacheid
        self.features = np.concatenate(
            (
                # self.scaler_tri.transform(data[cols_tri]),
                self.scaler_e.transform(data[self.c["cols_e"]].values.reshape(-1, 1)).reshape(
                    -1, len(self.c["cols_e"])
                ),
                self.scaler_t.transform(data[self.c["cols_t"]].values.reshape(-1, 1)).reshape(
                    -1, len(self.c["cols_t"])
                ),
            ),
            axis=1,
        )
        self.labels = keras.utils.to_categorical(
            data[[self.c["label"]]].values.ravel(), num_classes=len(self.c["neutrons"]) + 1
        )
        del data

    def fitscalers(self):
        subruns = range(5)  # self.cache_subruns[0]
        files = [
            filename_for(
                self.c["distance"],
                self.c["doubleplane"],
                self.c["energy"],
                self.c["erel"],
                n,
                "inclxx",
                subrun,
                "bars.parquet",
            )
            for n in self.c["neutrons"]
            for subrun in subruns
        ]
        data = pd.concat([pd.read_parquet(file) for file in files], ignore_index=True)
        self.scaler_e.fit(data[self.c["cols_e"]].values.reshape(-1, 1))
        self.scaler_t.fit(data[self.c["cols_t"]].values.reshape(-1, 1))
        del data

In [4]:
class Model(kerastuner.HyperModel):
    def __init__(self, num_classes):
        self.num_classes = num_classes

    def build(self, hp):
        model = keras.models.Sequential()
        model.add(
            keras.layers.Dense(
                units=hp.Int(f"relu1nodes", min_value=1000, max_value=10000, step=3000), activation="relu",
            )
        )
        model.add(
            keras.layers.Dense(units=hp.Int(f"relu2nodes", min_value=500, max_value=3000, step=200), activation="relu",)
        )
        model.add(keras.layers.Dense(units=self.num_classes, activation="softmax"))

        optimizer = hp.Choice("optimizer", ["adam", "adagrad", "adadelta", "SGD"])
        loss = keras.losses.CategoricalCrossentropy()

        model.compile(loss=loss, optimizer=optimizer, metrics=["accuracy"])

        return model

In [5]:
config = {
    "distance": 15,
    "doubleplane": 30,
    "energy": 600,
    "erel": 500,
    "neutrons": [1, 2, 3, 4],
    "subruns": range(7),  # range(19),
    "subrun_cache_size": 7,
    "batch_size": 200,
    "cols_e": [str(i) for i in range(0, 30 * 100 * 2, 2)],
    "cols_t": [str(i + 1) for i in range(0, 30 * 100 * 2, 2)],
    "label": "nPN",
}

validation_config = config.copy()
validation_config["subruns"] = [19]

In [6]:
model = Model(len(config["neutrons"]) + 1)
generator = DataGeneratorBars(config)
validation_generator = DataGeneratorBars(validation_config)

tuner = kerastuner.tuners.BayesianOptimization(
    # tuner = kerastuner.tuners.RandomSearch(
    model,
    objective="val_accuracy",
    max_trials=20,
    executions_per_trial=1,
    directory="kerastuner",
    project_name="bars-ram-V0",  # , overwrite=True
)
tuner.search_space_summary()

Rows in one file: 10000 -> 200 batches per subrun -> 1400 total batches in [range(0, 7)] caches
Loading subruns range(0, 7) for cache 0
Rows in one file: 10000 -> 200 batches per subrun -> 200 total batches in [[19]] caches
Loading subruns [19] for cache 0


In [7]:
tuner.search(generator, verbose=2, epochs=3, shuffle=False, validation_data=validation_generator)

  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 54s - loss: 0.8176 - accuracy: 0.6305 - val_loss: 0.7788 - val_accuracy: 0.6443
Epoch 2/3
1400/1400 - 51s - loss: 0.7677 - accuracy: 0.6494 - val_loss: 0.7686 - val_accuracy: 0.6574
Epoch 3/3
1400/1400 - 46s - loss: 0.7563 - accuracy: 0.6626 - val_loss: 0.7646 - val_accuracy: 0.6566


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 29s - loss: 0.7973 - accuracy: 0.6341 - val_loss: 0.7412 - val_accuracy: 0.6632
Epoch 2/3
1400/1400 - 25s - loss: 0.6849 - accuracy: 0.6932 - val_loss: 0.7614 - val_accuracy: 0.6623
Epoch 3/3
1400/1400 - 25s - loss: 0.5211 - accuracy: 0.7828 - val_loss: 1.0020 - val_accuracy: 0.6392


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 29s - loss: 0.7969 - accuracy: 0.6342 - val_loss: 0.7409 - val_accuracy: 0.6635
Epoch 2/3
1400/1400 - 28s - loss: 0.6840 - accuracy: 0.6933 - val_loss: 0.7587 - val_accuracy: 0.6636
Epoch 3/3
1400/1400 - 25s - loss: 0.5193 - accuracy: 0.7830 - val_loss: 0.9715 - val_accuracy: 0.6446


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 28s - loss: 0.7990 - accuracy: 0.6335 - val_loss: 0.7440 - val_accuracy: 0.6612
Epoch 2/3
1400/1400 - 28s - loss: 0.6875 - accuracy: 0.6919 - val_loss: 0.7582 - val_accuracy: 0.6629
Epoch 3/3
1400/1400 - 24s - loss: 0.5286 - accuracy: 0.7804 - val_loss: 0.9404 - val_accuracy: 0.6484


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 12s - loss: 0.8046 - accuracy: 0.6313 - val_loss: 0.7547 - val_accuracy: 0.6563
Epoch 2/3
1400/1400 - 12s - loss: 0.7131 - accuracy: 0.6779 - val_loss: 0.7556 - val_accuracy: 0.6617
Epoch 3/3
1400/1400 - 10s - loss: 0.6145 - accuracy: 0.7315 - val_loss: 0.8381 - val_accuracy: 0.6522


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 44s - loss: 0.7950 - accuracy: 0.6349 - val_loss: 0.7393 - val_accuracy: 0.6639
Epoch 2/3
1400/1400 - 44s - loss: 0.6750 - accuracy: 0.6979 - val_loss: 0.7656 - val_accuracy: 0.6643
Epoch 3/3
1400/1400 - 38s - loss: 0.4857 - accuracy: 0.8004 - val_loss: 1.0327 - val_accuracy: 0.6452


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 58s - loss: 0.7950 - accuracy: 0.6347 - val_loss: 0.7378 - val_accuracy: 0.6638
Epoch 2/3
1400/1400 - 58s - loss: 0.6714 - accuracy: 0.7011 - val_loss: 0.7648 - val_accuracy: 0.6647
Epoch 3/3
1400/1400 - 50s - loss: 0.4684 - accuracy: 0.8095 - val_loss: 1.0505 - val_accuracy: 0.6469


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 68s - loss: 1.5954 - accuracy: 0.2578 - val_loss: 1.5791 - val_accuracy: 0.2779
Epoch 2/3
1400/1400 - 67s - loss: 1.5605 - accuracy: 0.2904 - val_loss: 1.5417 - val_accuracy: 0.2921
Epoch 3/3
1400/1400 - 68s - loss: 1.5237 - accuracy: 0.2976 - val_loss: 1.5065 - val_accuracy: 0.2989


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 38s - loss: 0.7921 - accuracy: 0.6358 - val_loss: 0.7398 - val_accuracy: 0.6627
Epoch 2/3
1400/1400 - 38s - loss: 0.6701 - accuracy: 0.7012 - val_loss: 0.7732 - val_accuracy: 0.6628
Epoch 3/3
1400/1400 - 33s - loss: 0.4810 - accuracy: 0.8016 - val_loss: 1.0515 - val_accuracy: 0.6476


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 82s - loss: 0.7899 - accuracy: 0.6371 - val_loss: 0.7370 - val_accuracy: 0.6644
Epoch 2/3
1400/1400 - 72s - loss: 0.6570 - accuracy: 0.7076 - val_loss: 0.7751 - val_accuracy: 0.6638
Epoch 3/3
1400/1400 - 71s - loss: 0.4384 - accuracy: 0.8213 - val_loss: 1.1745 - val_accuracy: 0.6428


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 12s - loss: 1.4670 - accuracy: 0.3194 - val_loss: 1.4322 - val_accuracy: 0.2517
Epoch 2/3
1400/1400 - 11s - loss: 1.4277 - accuracy: 0.3526 - val_loss: 1.4237 - val_accuracy: 0.2887
Epoch 3/3
1400/1400 - 12s - loss: 1.4194 - accuracy: 0.3926 - val_loss: 1.4136 - val_accuracy: 0.4249


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 71s - loss: 0.7922 - accuracy: 0.6359 - val_loss: 0.7355 - val_accuracy: 0.6666
Epoch 2/3
1400/1400 - 61s - loss: 0.6617 - accuracy: 0.7056 - val_loss: 0.7686 - val_accuracy: 0.6656
Epoch 3/3
1400/1400 - 61s - loss: 0.4491 - accuracy: 0.8172 - val_loss: 1.1287 - val_accuracy: 0.6439


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 53s - loss: 0.7916 - accuracy: 0.6364 - val_loss: 0.7369 - val_accuracy: 0.6649
Epoch 2/3
1400/1400 - 53s - loss: 0.6636 - accuracy: 0.7042 - val_loss: 0.7712 - val_accuracy: 0.6650
Epoch 3/3
1400/1400 - 46s - loss: 0.4601 - accuracy: 0.8121 - val_loss: 1.1004 - val_accuracy: 0.6397


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 60s - loss: 1.4552 - accuracy: 0.3520 - val_loss: 1.4195 - val_accuracy: 0.3732
Epoch 2/3
1400/1400 - 60s - loss: 1.4086 - accuracy: 0.4626 - val_loss: 1.3960 - val_accuracy: 0.5265
Epoch 3/3
1400/1400 - 60s - loss: 1.3762 - accuracy: 0.5276 - val_loss: 1.3505 - val_accuracy: 0.5300


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 10s - loss: 1.4588 - accuracy: 0.2941 - val_loss: 1.4295 - val_accuracy: 0.2560
Epoch 2/3
1400/1400 - 9s - loss: 1.4254 - accuracy: 0.3558 - val_loss: 1.4210 - val_accuracy: 0.3233
Epoch 3/3
1400/1400 - 9s - loss: 1.4148 - accuracy: 0.4074 - val_loss: 1.4065 - val_accuracy: 0.4816


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 14s - loss: 0.8025 - accuracy: 0.6315 - val_loss: 0.7531 - val_accuracy: 0.6579
Epoch 2/3
1400/1400 - 13s - loss: 0.7061 - accuracy: 0.6809 - val_loss: 0.7595 - val_accuracy: 0.6600
Epoch 3/3
1400/1400 - 12s - loss: 0.6006 - accuracy: 0.7390 - val_loss: 0.8574 - val_accuracy: 0.6485


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 11s - loss: 0.8055 - accuracy: 0.6316 - val_loss: 0.7529 - val_accuracy: 0.6576
Epoch 2/3
1400/1400 - 9s - loss: 0.7156 - accuracy: 0.6757 - val_loss: 0.7567 - val_accuracy: 0.6568
Epoch 3/3
1400/1400 - 9s - loss: 0.6292 - accuracy: 0.7243 - val_loss: 0.8115 - val_accuracy: 0.6561


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 62s - loss: 0.7927 - accuracy: 0.6363 - val_loss: 0.7374 - val_accuracy: 0.6629
Epoch 2/3
1400/1400 - 62s - loss: 0.6671 - accuracy: 0.7027 - val_loss: 0.7661 - val_accuracy: 0.6646
Epoch 3/3
1400/1400 - 53s - loss: 0.4626 - accuracy: 0.8106 - val_loss: 1.1006 - val_accuracy: 0.6446


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 58s - loss: 0.7943 - accuracy: 0.6353 - val_loss: 0.7378 - val_accuracy: 0.6637
Epoch 2/3
1400/1400 - 50s - loss: 0.6684 - accuracy: 0.7020 - val_loss: 0.7695 - val_accuracy: 0.6613
Epoch 3/3
1400/1400 - 50s - loss: 0.4588 - accuracy: 0.8141 - val_loss: 1.0746 - val_accuracy: 0.6400


  ...
    to  
  ['...']
Train for 1400 steps, validate for 200 steps
Epoch 1/3
1400/1400 - 58s - loss: 0.7947 - accuracy: 0.6352 - val_loss: 0.7377 - val_accuracy: 0.6636
Epoch 2/3
1400/1400 - 58s - loss: 0.6713 - accuracy: 0.7006 - val_loss: 0.7693 - val_accuracy: 0.6638
Epoch 3/3
1400/1400 - 50s - loss: 0.4728 - accuracy: 0.8070 - val_loss: 1.0698 - val_accuracy: 0.6409


INFO:tensorflow:Oracle triggered exit


In [8]:
tuner.results_summary()