In [2]:
import keras_tuner
import pickle
from sklearn.utils import class_weight
from model_definition import *
from preprocessing import vocab_size

In [3]:
with open('./data/train_preprocessed_routes', 'rb') as f:
    X_train, y_train = pickle.load(f).values()
#with open('./data/test_preprocessed_routes', 'rb') as f:
#    X_test, y_test = pickle.load(f).values()
with open('./data/val_preprocessed_routes', 'rb') as f:
    X_val, y_val = pickle.load(f).values()

In [4]:
X_train[:, 0] = 3
#X_test[:, 0] = 3
X_val[:, 0] = 3

In [5]:
# Variables:

#constant
num_classes = 9
epochs = 30

# Iteration one, 68 trials :

# num_layers   =  2    -  8,   step 2   ->   2
# d_model      =  64   -  512, step 64  ->   64
# dff          =  512  -  2048, step 256 ->  768
# num_heads    =  4    -  10,   step 2   ->  10
# dropout_rate =  0.1  -  0.4,  step 0.1 ->  0.1
# warmup_steps =  2500 -  5500, step 500 ->  3500



# Iteration two, 100 trials:

# num_layers   =  1    -  4,   step 1   ->  3
# d_model      =  16   -  128, step 8  -> 128
# dff          =  512  -  1024, step 32 -> 736
# num_heads    =  8    -  14,   step 1   -> 8
# dropout_rate =  0.04 -  0.24,  step 0.02 -> 0.2
# warmup_steps =  2500 -  4750, step 250 -> 3250
# beta_1       =  0.79 -  0.95, step 0.02 -> 0.79
# beta_2       =  0.95 -  0.99, step 0.005 -> 0.98
# epsilon      = 1e-11 -  1e-7, step NA   -> 6.35e-08

# Iteration three, 99 trials:

# num_layers   =  1    -  4,   step 1   ->  
    # default: 2
# d_model      =  16   -  192, step 16  -> 
    # default: 128
# dff          =  512  -  1280, step 64 ->
    # default: 768
# num_heads    =  8    -  14,   step 1   -> 
    # default: 10
# dropout_rate =  0.125 -  0.30,  step 0.025 -> 
    # default: 0.2
# warmup_steps =  2000 -  7000, step 500 -> 
    # default: 4000
# beta_1       =  0.74 -  0.93, step 0.0025 -> 
    # default: 0.8
# beta_2       =  0.95 -  0.99, step 0.005 -> 
    # default: 0.98
# epsilon      = 1e-11 -  1e-7, sample 'log' ->
    # default: 1e-8
# global_batch =  16   -  128,  step 16  ->
    # default: 64


# Implement:
# global_batch =  16   -  128,  step 16  ->

# learning_rate = CustomSchedule(d_model, warmup_steps=warmup_steps)
# optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

In [6]:
class MyHyperModel(keras_tuner.HyperModel):
    def build(self, hp): 
        num_layers = hp.Int("num_layers", min_value=1, max_value=4, default=2, step=1)
        d_model = hp.Int("d_model", min_value=16, max_value=192, default=128, step=16)
        dff = hp.Int("dff", min_value=512, max_value=1280, default=768, step=64)
        num_heads = hp.Int("num_heads", min_value=8, max_value=14, default=10, step=1)
        dropout_rate = hp.Float("dropout_rate", min_value=0.125, max_value=0.3, default=0.2, step=0.025)
        
        warmup_steps = hp.Int("warmup_steps", min_value=2000, max_value=7000, default=4000, step=500)
        batch_size = hp.Int("batch_size", min_value=16, max_value=128, step=16)
        beta_1 = hp.Float("beta_1", min_value=0.74, max_value=0.93, default= .8, step=0.0025)
        beta_2 = hp.Float("beta_2", min_value=0.95, max_value=0.99, default= .98, step=0.005)
        epsilon = hp.Float("epsilon", min_value=1e-9, max_value=5e-7, default= 1e-8, sampling="log")
    
        num_classes = 9

        model = EncoderClassifier(
            num_layers=num_layers,
            d_model=d_model,
            num_heads=num_heads,
            dff=dff,
            vocab_size=vocab_size,
            num_classes=num_classes,
            dropout_rate=dropout_rate,
            activation=hp.Choice("activation", values=['relu', 'swish'], default='relu'),
            sequential=hp.Boolean("sequential")
        )
        
        learning_rate = CustomSchedule(d_model, warmup_steps=warmup_steps)
        optimizer = tf.keras.optimizers.Adam(learning_rate, 
                                beta_1=hp.Float("beta_1", min_value=0.74, max_value=0.93, default= .8, step=0.0025),
                                beta_2=hp.Float("beta_2", min_value=0.95, max_value=0.99, default= .98, step=0.005),
                                epsilon=hp.Float("epsilon", min_value=1e-9, max_value=5e-7, default= 1e-8, sampling="log"))
        
        model.compile(
            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
            optimizer=optimizer,
            metrics=['accuracy'])
    
        return model

    def fit(self, hp, model, x, y, epochs, validation_data, verbose=1, **kwargs):
        batch_size = hp.Int("batch_size", 16, 128, step=16, default=64)
        
        # Convert the datasets to tf.data.Dataset.        
        train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(batch_size)
        validation_data = tf.data.Dataset.from_tensor_slices(validation_data).batch(batch_size)

        history = model.fit(train_ds, epochs=epochs, validation_data=validation_data, verbose=1,
                  class_weight=class_weights)
        
        #step = np.argmax(history.history['val_accuracy'])
        #loss = history.history['val_accuracy'][step]
        #accuracy = history.history['val_accuracy'][step]
        #val_loss = history.history['val_accuracy'][step]
        #val_accuracy = history.history['val_accuracy'][step]
           
        return history

In [7]:
tuner_name = "tuner3"

tuner = keras_tuner.BayesianOptimization(
    hypermodel=MyHyperModel(),
    #hyperparameters=hp,
    #tune_new_entries=True,
    objective="val_accuracy",
    max_trials=10,
    overwrite=False,
    directory="tuners",
    project_name=tuner_name,
)

class_weights = class_weight.compute_class_weight(class_weight='balanced',
                                                  classes=np.unique(y_train),
                                                  y=y_train) 
class_weights=dict(zip(np.unique(y_train), class_weights))


my_callbacks = tf.keras.callbacks.EarlyStopping(
    monitor="val_accuracy", 
    min_delta=0.05, patience=3,
    verbose=2, baseline=0.40, start_from_epoch=1)

INFO:tensorflow:Reloading Tuner from tuners/tuner3/tuner0.json


In [8]:
tuner.search(x=X_train, y=y_train, epochs=5, validation_data=(X_val, y_val), 
             class_weight=class_weights, callbacks=[my_callbacks])
#best_model = tuner.get_best_models()[5]


Search: Running Trial #3

Value             |Best Value So Far |Hyperparameter
2                 |?                 |num_layers
80                |?                 |d_model
832               |?                 |dff
12                |?                 |num_heads
0.175             |?                 |dropout_rate
4500              |?                 |warmup_steps
128               |?                 |batch_size
0.91              |?                 |beta_1
0.985             |?                 |beta_2
2.7038e-09        |?                 |epsilon
relu              |?                 |activation
True              |?                 |sequential

Epoch 1/5


2023-03-17 16:46:39.502036: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-17 16:46:41.265546: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13641 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:60:00.0, compute capability: 7.5
2023-03-17 16:46:41.268571: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13641 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:61:00.0, compute capability: 7.5
2023-03-17 16:46:48.328922: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8401
2023-03-17 16:46:48.7434

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


FatalTypeError: Expected the return value of HyperModel.fit() to be one of float, dict, keras.callbacks.History, or a list of one of these types. Recevied return value: {<keras.callbacks.History object at 0x7f34a2fdb850>} of type <class 'set'>.