In [1]:
import numpy as np

full_data = np.load('./full_data.npy', allow_pickle=True)
X_train_event, X_test_event, X_train_obj, X_test_obj, y_train, y_test = full_data

In [2]:
import tensorflow as tf
from tensorflow import keras
import keras_tuner as kt

2022-02-14 11:57:52.974343: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [2]:
model = keras.models.load_model('./RNN_tuned_model')
model.summary()

2022-02-03 18:19:07.974675: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-02-03 18:19:07.976193: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-02-03 18:19:08.842565: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:3b:00.0 name: Tesla T4 computeCapability: 7.5
coreClock: 1.59GHz coreCount: 40 deviceMemorySize: 14.56GiB deviceMemoryBandwidth: 298.08GiB/s
2022-02-03 18:19:08.845116: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 1 with properties: 
pciBusID: 0000:5e:00.0 name: Tesla T4 computeCapability: 7.5
coreClock: 1.59GHz coreCount: 40 deviceMemorySize: 14.56GiB deviceMemoryBandwidth: 298.08GiB/s
2022-02-03 18:19:08.847641: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 2 with properties: 
pciBusID: 0000:5f:00.0 name: Tesla T4 computeCapability: 

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 14, 8)]      0                                            
__________________________________________________________________________________________________
lstm_2 (LSTM)                   (None, 14, 80)       28480       input_3[0][0]                    
__________________________________________________________________________________________________
layer_normalization_1 (LayerNor (None, 14, 80)       160         lstm_2[0][0]                     
__________________________________________________________________________________________________
lstm_3 (LSTM)                   (None, 14, 80)       51520       layer_normalization_1[0][0]      
____________________________________________________________________________________________

In [3]:
all_devices = len(tf.config.list_physical_devices('GPU'))
print("Num GPUs Available: ", all_devices)
physical_devices=tf.config.list_physical_devices('GPU')
gpus= tf.config.experimental.list_physical_devices('GPU')
for i in range(0,all_devices):
    tf.config.experimental.set_memory_growth(gpus[i], True)

mirrored_strategy = tf.distribute.MirroredStrategy(devices=[f"/GPU:{GPU_id}" for GPU_id in range (0,6)])


Num GPUs Available:  6
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1', '/job:localhost/replica:0/task:0/device:GPU:2', '/job:localhost/replica:0/task:0/device:GPU:3', '/job:localhost/replica:0/task:0/device:GPU:4', '/job:localhost/replica:0/task:0/device:GPU:5')


2022-02-14 11:59:40.003883: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-02-14 11:59:40.008350: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-02-14 11:59:40.878225: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:3b:00.0 name: Tesla T4 computeCapability: 7.5
coreClock: 1.59GHz coreCount: 40 deviceMemorySize: 14.56GiB deviceMemoryBandwidth: 298.08GiB/s
2022-02-14 11:59:40.880846: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 1 with properties: 
pciBusID: 0000:5e:00.0 name: Tesla T4 computeCapability: 7.5
coreClock: 1.59GHz coreCount: 40 deviceMemorySize: 14.56GiB deviceMemoryBandwidth: 298.08GiB/s
2022-02-14 11:59:40.883330: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 2 with properties: 
pciBusID: 0000:5f:00.0 name: Tesla T4 computeCapability: 

In [11]:
from keras.layers import Input, LSTM, GRU, Dense, Concatenate, Dropout, LayerNormalization
from keras.models import Model

def build_model(hp):
    """
    Paramaters to tune:
    - Number of units per LSTM layer
    - No of LSTM layers
    - No of units per feedforward hidden layer
    - No of feedforward hidden layers
    - Activation functions for feedforward (RELU vs elu vs selu)
    - Batch normalisation 
    - Drop out
    - Optmiser (sgd vs Adam vs Nadam)

    """
    with mirrored_strategy.scope():
        obj_input = Input(shape=X_train_obj.shape[1:])
        prev = obj_input
        RNN_layer_type = hp.Choice("RNN layer kind", ["LSTM", "GRU"])
        RNN_units = hp.Int("RNN units", 50, 300, 50)
        for _ in range(hp.Int("RNN layers", 1, 3, 1)-1):
            if RNN_layer_type == "LSTM":
                prev = LSTM(RNN_units, return_sequences=True)(prev)
            elif RNN_layer_type == "GRU":
                prev = GRU(RNN_units, return_sequences=True)(prev)
            prev = LayerNormalization()(prev)
        if RNN_layer_type == "LSTM":
            RNN_out = LSTM(RNN_units)(prev)
        elif RNN_layer_type == "GRU":
            RNN_out = GRU(RNN_units)(prev)
        event_input = Input(shape=X_train_event.shape[1])
        x = Concatenate()([RNN_out, event_input])
        prev = x 
        mlp_units = hp.Int("MLP units", 200, 400, 100)
        for _ in range(hp.Int("Feedforward layers", 3, 5, 1)):
            prev = Dense(mlp_units, activation="selu", kernel_initializer="lecun_normal")(prev)
            prev = Dropout(rate=hp.Float("Dropout rate", 0.2, 0.4, 0.1))(prev)
        out = Dense(1, activation="sigmoid")(prev)
        model = Model(inputs=[obj_input, event_input], outputs=out)
        model.compile(loss="binary_crossentropy", optimizer="Nadam", metrics=[keras.metrics.AUC(), keras.metrics.Precision(), keras.metrics.Recall()])  
        return model


In [12]:
tuner = kt.BayesianOptimization(build_model, objective='val_loss', max_trials=20, overwrite=True)

INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Redu

In [8]:
# Imbalanced dataset so want to adjusts weights of signal and background training examples
tot = len(y_train)
pos = np.sum(y_train.values[:,-1] == 1)
neg = tot - pos
print(f'Total training samples:  {tot}\npositives:  {pos}\nnegatives:  {neg}')

# weight positives more than negatives
weight_for_0 = (1 / neg) * (tot / 2.0)
weight_for_1 = (1 / pos) * (tot / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}
print(f'Postive weight:  {weight_for_1} \nNegative weight:  {weight_for_0}')

Total training samples:  327177
positives:  79444
negatives:  247733
Postive weight:  2.0591674638739237 
Negative weight:  0.660341981084474


In [13]:
tuner.search([X_train_obj,X_train_event], y_train.values[:,-1], epochs=20, validation_data=([X_test_obj,X_test_event], y_test.values[:,-1]), class_weight=class_weight, 
callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss',  patience=3)])



Search: Running Trial #1

Hyperparameter    |Value             |Best Value So Far 
RNN layer kind    |GRU               |?                 
RNN units         |200               |?                 
RNN layers        |1                 |?                 
MLP units         |300               |?                 
Feedforward layers|4                 |?                 
Dropout rate      |0.2               |?                 



2022-02-14 12:02:27.281118: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:656] In AUTO-mode, and switching to DATA-based sharding, instead of FILE-based sharding as we cannot find appropriate reader dataset op(s) to shard. Error: Did not find a shardable source, walked to a node which is not a dataset: name: "FlatMapDataset/_9"
op: "FlatMapDataset"
input: "PrefetchDataset/_8"
attr {
  key: "Targuments"
  value {
    list {
    }
  }
}
attr {
  key: "f"
  value {
    func {
      name: "__inference_Dataset_flat_map_slice_batch_indices_21422"
    }
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: -1
        }
      }
    }
  }
}
attr {
  key: "output_types"
  value {
    list {
      type: DT_INT64
    }
  }
}
. Consider either turning off auto-sharding or switching the auto_shard_policy to DATA to shard this dataset. You can do this by creating a new `tf.data.Options()` object then setting `options.experimental_distribute.au

Epoch 1/20
INFO:tensorflow:batch_all_reduce: 13 all-reduces with algorithm = nccl, num_packs = 1
INFO:tensorflow:batch_all_reduce: 13 all-reduces with algorithm = nccl, num_packs = 1
  427/10225 [>.............................] - ETA: 6:34 - loss: 0.6661 - auc: 0.7260 - precision: 0.3853 - recall: 0.6630

KeyboardInterrupt: 

In [8]:
train_weights, test_weights = np.load("weights.npy", allow_pickle=True)
# train_weights, test_weights = tf.convert_to_tensor(train_weights), tf.convert_to_tensor(test_weights)
# thresholds = tf.range(0, 1, 0.01)
def significance(y_true, y_pred):
    # max(test_weights * y_pred[y_true==1] / (sqrt(test_weights * y_pred[y_true==0])))
    sf = 0
    for thr in np.arange(0, 1, 0.01):
        sg = 
        # sg, bg = 0, 0
        # for idx, pred in enumerate(y_pred):
        #     if pred >= thr:
        #         if y_true == 1:
        #             sg += test_weights[idx]
        #         else:
        #             bg += test_weights[idx]
        #     s = 140e3 * sg / np.sqrt(140e3 * bg)
        #     sf = max(s, sf)
        
    return sf

In [5]:
from keras.layers import Input, LSTM, GRU, Dense, Concatenate, Dropout, LayerNormalization
from keras.models import Model

obj_input = Input(shape=X_train_obj.shape[1:])
GRU1 = LSTM(100, dropout=0.1)(obj_input)
ln = LayerNormalization()(GRU1)
event_input = Input(shape=X_train_event.shape[1])
# x = Concatenate()([ln, event_input])
hidden1 = Dense(100, activation="selu", kernel_initializer="lecun_normal")(event_input)
x = Concatenate()([hidden1, ln])
do1 = Dropout(0.2)(x)
hidden2 = Dense(300, activation="selu", kernel_initializer="lecun_normal")(do1)
do2 = Dropout(0.2)(hidden2)
hidden3 = Dense(300, activation="selu", kernel_initializer="lecun_normal")(do2)
do3 = Dropout(0.2)(hidden3)
out = Dense(1, activation="sigmoid")(do3)
model = Model(inputs=[obj_input, event_input], outputs=out)
model.compile(loss="binary_crossentropy", optimizer="Nadam", metrics=[keras.metrics.AUC(), keras.metrics.Precision(), keras.metrics.Recall()])



In [6]:
model.fit([X_train_obj, X_train_event], y_train, epochs=20, validation_data=([X_test_obj, X_test_event], y_test))

2022-02-02 10:28:52.997369: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2022-02-02 10:28:52.997945: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2200000000 Hz


Epoch 1/20


2022-02-02 10:28:55.590004: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2022-02-02 10:28:55.921191: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20