# Keras Tuner Bars

In [1]:
import numpy as np
import pandas as pd
import sklearn.preprocessing
import time
import sys

sys.path.append("..")
from helpers import filename_for

In [2]:
import tensorflow
from tensorflow import keras
import kerastuner

print("tensorflow", tensorflow.__version__)
print("keras", keras.__version__)
print("keras-tuner", kerastuner.__version__)

tensorflow 2.1.0
keras 2.2.4-tf
keras-tuner 1.0.1


In [3]:
class DataGeneratorBars(keras.utils.Sequence):
    def __init__(self, config, batch_size=1000):
        self.c = config
        self.batch_size = batch_size
        
        self.current_subrun = 0
        self.labels = []
        self.features = []
        
        self.scaler_tri = sklearn.preprocessing.MaxAbsScaler()
        self.scaler_e = sklearn.preprocessing.MaxAbsScaler()
        self.scaler_t = sklearn.preprocessing.MaxAbsScaler()
        
        file = filename_for(self.c["distance"], self.c["doubleplane"], self.c["energy"], self.c["erel"], self.c["neutrons"][0], "inclxx", self.c["subruns"][0], "bars.parquet")
        data = pd.read_parquet(file)
        rows = len(data.index)
        del data
        
        self.batches_per_subrun = (rows * len(self.c["neutrons"])) // self.batch_size
        self.len = self.batches_per_subrun * len(self.c["subruns"])
        print(f"Rows in one file: {rows} -> {self.batches_per_subrun} batches per subrun -> {self.len} total batches")

        self.fitscalers()
        self.load(self.c["subruns"][0])

    def __len__(self):
        return self.len
    
    def __getitem__(self, index):
        subrun = self.c["subruns"][index // self.batches_per_subrun]
        i = index % self.batches_per_subrun
        # print(f"{index} -> {subrun}-{i}")
        
        if(subrun != self.current_subrun):
            print(f"Loading subrun {subrun} for index {index}")
            self.load(subrun)
        
        x = self.features[i*self.batch_size:(i+1)*self.batch_size]
        y = self.labels[i*self.batch_size:(i+1)*self.batch_size]
        return x, y

    def load(self, subrun):
        files = [filename_for(self.c["distance"], self.c["doubleplane"], self.c["energy"], self.c["erel"], n, "inclxx", subrun, "bars.parquet") for n in self.c["neutrons"]]
        data = pd.concat([pd.read_parquet(file) for file in files], ignore_index=True).sample(frac=1)
        data.loc[data["nHits"] == 0, self.c["label"]] = 0

        self.current_subrun = subrun
        self.features = np.concatenate(
            (
                # self.scaler_tri.transform(data[cols_tri]),
                self.scaler_e.transform(data[self.c["cols_e"]].values.reshape(-1, 1)).reshape(-1, len(self.c["cols_e"])),
                self.scaler_t.transform(data[self.c["cols_t"]].values.reshape(-1, 1)).reshape(-1, len(self.c["cols_t"])),
            ),
            axis=1,
        )
        self.labels = keras.utils.to_categorical(data[[self.c["label"]]].values.ravel(), num_classes=len(self.c["neutrons"]) + 1)
        del data
                                                      
    def fitscalers(self):
        files = [filename_for(self.c["distance"], self.c["doubleplane"], self.c["energy"], self.c["erel"], n, "inclxx", subrun, "bars.parquet") for n in self.c["neutrons"] for subrun in range(5)]
        data = pd.concat([pd.read_parquet(file) for file in files], ignore_index=True)
        self.scaler_e.fit(data[self.c["cols_e"]].values.reshape(-1, 1))
        self.scaler_t.fit(data[self.c["cols_t"]].values.reshape(-1, 1))
        del data

In [4]:
class Model(kerastuner.HyperModel):
    def __init__(self, num_classes):
        self.num_classes = num_classes

    def build(self, hp):
        model = keras.models.Sequential()
        model.add(keras.layers.Dense(units=hp.Int(f"relu1nodes", min_value=1000, max_value=10000, step=3000), activation="relu",))
        model.add(keras.layers.Dense(units=hp.Int(f"relu2nodes", min_value=500, max_value=3000, step=200), activation="relu",))
        model.add(keras.layers.Dense(units=self.num_classes, activation="softmax"))

        optimizer = hp.Choice("optimizer", ["adam", "adagrad", "adadelta", "SGD"])
        loss = keras.losses.CategoricalCrossentropy()

        model.compile(loss=loss, optimizer=optimizer, metrics=["accuracy"])

        return model

In [5]:
config = {
    "distance": 15,
    "doubleplane": 30,
    "energy": 600,
    "erel": 500,
    "neutrons": [1, 2, 3, 4],
    "subruns": range(19),
    "cols_e": [str(i) for i in range(0, 30 * 100 * 2, 2)],
    "cols_t": [str(i + 1) for i in range(0, 30 * 100 * 2, 2)],
    "label": "nPN",
}

validation_config = config.copy()
validation_config["subruns"] = [19]

In [6]:
model = Model(len(config["neutrons"]) + 1)
generator = DataGeneratorBars(config, 200)
validation_generator = DataGeneratorBars(validation_config, 200)

tuner = kerastuner.tuners.BayesianOptimization(
#tuner = kerastuner.tuners.RandomSearch(
    model, objective="val_accuracy", max_trials=20, executions_per_trial=1, directory="kerastuner", project_name="bars-ram-V0", overwrite=True
)
tuner.search_space_summary()

Rows in one file: 10000 -> 200 batches per subrun -> 3800 total batches
Rows in one file: 10000 -> 200 batches per subrun -> 200 total batches


In [7]:
tuner.search(
    generator, verbose=2, epochs=3, shuffle=False, validation_data=validation_generator
)

  ...
    to  
  ['...']
Train for 3800 steps, validate for 200 steps
Epoch 1/3
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for index 1200
Loading subrun 7 for index 1400
Loading subrun 8 for index 1600
Loading subrun 9 for index 1800
Loading subrun 10 for index 2000
Loading subrun 11 for index 2200
Loading subrun 12 for index 2400
Loading subrun 13 for index 2600
Loading subrun 14 for index 2800
Loading subrun 15 for index 3000
Loading subrun 16 for index 3200
Loading subrun 17 for index 3400
Loading subrun 18 for index 3600
3800/3800 - 526s - loss: 0.7466 - accuracy: 0.6597 - val_loss: 0.7012 - val_accuracy: 0.6845
Epoch 2/3
Loading subrun 0 for index 0
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for index 1200
Loading subrun 7 fo

Loading subrun 0 for index 0
  ...
    to  
  ['...']
Train for 3800 steps, validate for 200 steps
Epoch 1/3
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for index 1200
Loading subrun 7 for index 1400
Loading subrun 8 for index 1600
Loading subrun 9 for index 1800
Loading subrun 10 for index 2000
Loading subrun 11 for index 2200
Loading subrun 12 for index 2400
Loading subrun 13 for index 2600
Loading subrun 14 for index 2800
Loading subrun 15 for index 3000
Loading subrun 16 for index 3200
Loading subrun 17 for index 3400
Loading subrun 18 for index 3600
3800/3800 - 521s - loss: 1.5644 - accuracy: 0.3318 - val_loss: 1.5157 - val_accuracy: 0.3819
Epoch 2/3
Loading subrun 0 for index 0
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for i

Loading subrun 0 for index 0
  ...
    to  
  ['...']
Train for 3800 steps, validate for 200 steps
Epoch 1/3
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for index 1200
Loading subrun 7 for index 1400
Loading subrun 8 for index 1600
Loading subrun 9 for index 1800
Loading subrun 10 for index 2000
Loading subrun 11 for index 2200
Loading subrun 12 for index 2400
Loading subrun 13 for index 2600
Loading subrun 14 for index 2800
Loading subrun 15 for index 3000
Loading subrun 16 for index 3200
Loading subrun 17 for index 3400
Loading subrun 18 for index 3600
3800/3800 - 522s - loss: 0.7468 - accuracy: 0.6597 - val_loss: 0.7035 - val_accuracy: 0.6821
Epoch 2/3
Loading subrun 0 for index 0
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for i

Loading subrun 0 for index 0
  ...
    to  
  ['...']
Train for 3800 steps, validate for 200 steps
Epoch 1/3
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for index 1200
Loading subrun 7 for index 1400
Loading subrun 8 for index 1600
Loading subrun 9 for index 1800
Loading subrun 10 for index 2000
Loading subrun 11 for index 2200
Loading subrun 12 for index 2400
Loading subrun 13 for index 2600
Loading subrun 14 for index 2800
Loading subrun 15 for index 3000
Loading subrun 16 for index 3200
Loading subrun 17 for index 3400
Loading subrun 18 for index 3600
3800/3800 - 449s - loss: 0.8070 - accuracy: 0.6372 - val_loss: 0.7751 - val_accuracy: 0.6458
Epoch 2/3
Loading subrun 0 for index 0
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for i

Loading subrun 0 for index 0
  ...
    to  
  ['...']
Train for 3800 steps, validate for 200 steps
Epoch 1/3
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for index 1200
Loading subrun 7 for index 1400
Loading subrun 8 for index 1600
Loading subrun 9 for index 1800
Loading subrun 10 for index 2000
Loading subrun 11 for index 2200
Loading subrun 12 for index 2400
Loading subrun 13 for index 2600
Loading subrun 14 for index 2800
Loading subrun 15 for index 3000
Loading subrun 16 for index 3200
Loading subrun 17 for index 3400
Loading subrun 18 for index 3600
3800/3800 - 517s - loss: 0.7489 - accuracy: 0.6584 - val_loss: 0.7097 - val_accuracy: 0.6700
Epoch 2/3
Loading subrun 0 for index 0
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for i

Loading subrun 0 for index 0
  ...
    to  
  ['...']
Train for 3800 steps, validate for 200 steps
Epoch 1/3
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for index 1200
Loading subrun 7 for index 1400
Loading subrun 8 for index 1600
Loading subrun 9 for index 1800
Loading subrun 10 for index 2000
Loading subrun 11 for index 2200
Loading subrun 12 for index 2400
Loading subrun 13 for index 2600
Loading subrun 14 for index 2800
Loading subrun 15 for index 3000
Loading subrun 16 for index 3200
Loading subrun 17 for index 3400
Loading subrun 18 for index 3600
3800/3800 - 702s - loss: 0.7637 - accuracy: 0.6518 - val_loss: 0.7165 - val_accuracy: 0.6739
Epoch 2/3
Loading subrun 0 for index 0
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for i

Loading subrun 0 for index 0
  ...
    to  
  ['...']
Train for 3800 steps, validate for 200 steps
Epoch 1/3
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for index 1200
Loading subrun 7 for index 1400
Loading subrun 8 for index 1600
Loading subrun 9 for index 1800
Loading subrun 10 for index 2000
Loading subrun 11 for index 2200
Loading subrun 12 for index 2400
Loading subrun 13 for index 2600
Loading subrun 14 for index 2800
Loading subrun 15 for index 3000
Loading subrun 16 for index 3200
Loading subrun 17 for index 3400
Loading subrun 18 for index 3600
3800/3800 - 402s - loss: 1.4366 - accuracy: 0.3561 - val_loss: 1.4112 - val_accuracy: 0.4457
Epoch 2/3
Loading subrun 0 for index 0
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for i

Loading subrun 0 for index 0
  ...
    to  
  ['...']
Train for 3800 steps, validate for 200 steps
Epoch 1/3
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for index 1200
Loading subrun 7 for index 1400
Loading subrun 8 for index 1600
Loading subrun 9 for index 1800
Loading subrun 10 for index 2000
Loading subrun 11 for index 2200
Loading subrun 12 for index 2400
Loading subrun 13 for index 2600
Loading subrun 14 for index 2800
Loading subrun 15 for index 3000
Loading subrun 16 for index 3200
Loading subrun 17 for index 3400
Loading subrun 18 for index 3600
3800/3800 - 497s - loss: 0.7504 - accuracy: 0.6576 - val_loss: 0.7094 - val_accuracy: 0.6778
Epoch 2/3
Loading subrun 0 for index 0
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for i

Loading subrun 0 for index 0
  ...
    to  
  ['...']
Train for 3800 steps, validate for 200 steps
Epoch 1/3
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for index 1200
Loading subrun 7 for index 1400
Loading subrun 8 for index 1600
Loading subrun 9 for index 1800
Loading subrun 10 for index 2000
Loading subrun 11 for index 2200
Loading subrun 12 for index 2400
Loading subrun 13 for index 2600
Loading subrun 14 for index 2800
Loading subrun 15 for index 3000
Loading subrun 16 for index 3200
Loading subrun 17 for index 3400
Loading subrun 18 for index 3600
3800/3800 - 570s - loss: 0.7526 - accuracy: 0.6568 - val_loss: 0.7083 - val_accuracy: 0.6846
Epoch 2/3
Loading subrun 0 for index 0
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for i

Loading subrun 0 for index 0
  ...
    to  
  ['...']
Train for 3800 steps, validate for 200 steps
Epoch 1/3
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for index 1200
Loading subrun 7 for index 1400
Loading subrun 8 for index 1600
Loading subrun 9 for index 1800
Loading subrun 10 for index 2000
Loading subrun 11 for index 2200
Loading subrun 12 for index 2400
Loading subrun 13 for index 2600
Loading subrun 14 for index 2800
Loading subrun 15 for index 3000
Loading subrun 16 for index 3200
Loading subrun 17 for index 3400
Loading subrun 18 for index 3600
3800/3800 - 447s - loss: 0.7592 - accuracy: 0.6534 - val_loss: 0.7094 - val_accuracy: 0.6801
Epoch 2/3
Loading subrun 0 for index 0
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for i

Loading subrun 0 for index 0
  ...
    to  
  ['...']
Train for 3800 steps, validate for 200 steps
Epoch 1/3
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for index 1200
Loading subrun 7 for index 1400
Loading subrun 8 for index 1600
Loading subrun 9 for index 1800
Loading subrun 10 for index 2000
Loading subrun 11 for index 2200
Loading subrun 12 for index 2400
Loading subrun 13 for index 2600
Loading subrun 14 for index 2800
Loading subrun 15 for index 3000
Loading subrun 16 for index 3200
Loading subrun 17 for index 3400
Loading subrun 18 for index 3600
3800/3800 - 574s - loss: 1.4197 - accuracy: 0.4194 - val_loss: 1.3669 - val_accuracy: 0.5229
Epoch 2/3
Loading subrun 0 for index 0
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for i

Loading subrun 0 for index 0
  ...
    to  
  ['...']
Train for 3800 steps, validate for 200 steps
Epoch 1/3
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for index 1200
Loading subrun 7 for index 1400
Loading subrun 8 for index 1600
Loading subrun 9 for index 1800
Loading subrun 10 for index 2000
Loading subrun 11 for index 2200
Loading subrun 12 for index 2400
Loading subrun 13 for index 2600
Loading subrun 14 for index 2800
Loading subrun 15 for index 3000
Loading subrun 16 for index 3200
Loading subrun 17 for index 3400
Loading subrun 18 for index 3600
3800/3800 - 441s - loss: 0.7621 - accuracy: 0.6519 - val_loss: 0.7118 - val_accuracy: 0.6786
Epoch 2/3
Loading subrun 0 for index 0
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for i

Loading subrun 0 for index 0
  ...
    to  
  ['...']
Train for 3800 steps, validate for 200 steps
Epoch 1/3
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for index 1200
Loading subrun 7 for index 1400
Loading subrun 8 for index 1600
Loading subrun 9 for index 1800
Loading subrun 10 for index 2000
Loading subrun 11 for index 2200
Loading subrun 12 for index 2400
Loading subrun 13 for index 2600
Loading subrun 14 for index 2800
Loading subrun 15 for index 3000
Loading subrun 16 for index 3200
Loading subrun 17 for index 3400
Loading subrun 18 for index 3600
3800/3800 - 722s - loss: 0.7467 - accuracy: 0.6597 - val_loss: 0.7083 - val_accuracy: 0.6802
Epoch 2/3
Loading subrun 0 for index 0
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for i

Loading subrun 0 for index 0
  ...
    to  
  ['...']
Train for 3800 steps, validate for 200 steps
Epoch 1/3
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for index 1200
Loading subrun 7 for index 1400
Loading subrun 8 for index 1600
Loading subrun 9 for index 1800
Loading subrun 10 for index 2000
Loading subrun 11 for index 2200
Loading subrun 12 for index 2400
Loading subrun 13 for index 2600
Loading subrun 14 for index 2800
Loading subrun 15 for index 3000
Loading subrun 16 for index 3200
Loading subrun 17 for index 3400
Loading subrun 18 for index 3600
3800/3800 - 607s - loss: 0.7491 - accuracy: 0.6585 - val_loss: 0.7048 - val_accuracy: 0.6816
Epoch 2/3
Loading subrun 0 for index 0
Loading subrun 1 for index 200
Loading subrun 2 for index 400
Loading subrun 3 for index 600
Loading subrun 4 for index 800
Loading subrun 5 for index 1000
Loading subrun 6 for i

Loading subrun 0 for index 0


KeyboardInterrupt: 

In [None]:
tuner.results_summary()