In [1]:
import numpy as np
from tensorflow import keras

X_train_scaled, X_test_scaled, y_train, y_test = np.load('SL_Higgs_input_data.npy', allow_pickle=True)


In [2]:
from tensorflow import keras

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=(11,1)),
    keras.layers.Dense(100, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(100, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(100, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(100, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(1, activation='sigmoid')
])
model.summary()

In [3]:
# class Model:

#     def __init__(self, data_path='SL_Higgs_input_data.npy', model):

#         data = np.load(data_path)
#         self.X_train, self.X_test, self.y_train, self.y_test = data
#         self.model = model
#         tot = len(self.y_train)
#         pos = np.sum(self.y_train == 1)
#         neg = tot - pos
#         weight_for_0 = (1 / neg) * (tot / 2.0)
#         weight_for_1 = (1 / pos) * (tot / 2.0)
#         self.weights = {0: weight_for_0, 1: weight_for_1}
        
    
#     def fit(self, epochs=50):
#         self.history = self.model.fit()

        




In [2]:
# Imbalanced dataset so want to adjusts weights of signal and background training examples
tot = len(y_train)
pos = np.sum(y_train == 1)
neg = tot - pos
print(f'Total training samples:  {tot} \npositives:  {pos} \nnegatives:  {neg}')

# weight positives more than negatives
weight_for_0 = (1 / neg) * (tot / 2.0)
weight_for_1 = (1 / pos) * (tot / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}
print(f'Postive weight:  {weight_for_1} \nNegative weight:  {weight_for_0}')

Total training samples:  316424 
positives:  79444 
negatives:  236980
Postive weight:  1.991490861487337 
Negative weight:  0.6676175204658622


In [5]:
# Compile model with PR metrics as well as accuracy 
# model.compile(loss='binary_crossentropy', optimizer='Nadam', metrics=[keras.metrics.BinaryAccuracy(name='accuracy'),
#       keras.metrics.Precision(name='precision'),
#       keras.metrics.Recall(name='recall')])

In [3]:
# Set up tensorboard stuff
import os
import time


def get_run_logdir():
    """Create filename for log with date and time""" 
    root_logdir = os.path.join(os.curdir, 'my_logs')
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S") 
    return os.path.join(root_logdir, run_id)

tensorboard_cb = keras.callbacks.TensorBoard(get_run_logdir())

2021-11-26 10:13:52.043767: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-11-26 10:13:52.043843: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-11-26 10:13:52.052341: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [7]:
# fit model for 20 epochs

# history = model.fit(X_train_scaled, y_train, epochs=20, validation_data=(X_test_scaled, y_test), callbacks=[tensorboard_cb], class_weight=class_weight)

In [8]:
# from sklearn.metrics import roc_curve

# y_pred = model.predict(X_test_scaled).ravel()
# fpr, tpr, thresholds = roc_curve(y_test, y_pred)

In [9]:
# import matplotlib.pyplot as plt

# plt.plot(fpr, tpr)
# plt.plot(np.linspace(0,1,100), np.linspace(0,1,100), linestyle='dashed', linewidth=0.5, color='k')
# plt.xlabel('fpr')
# plt.ylabel('tpr')
# plt.savefig('ROC_basic_model.png', dpi=300)
# plt.show()

In [4]:
# try a more complicated model

complex_model = keras.models.Sequential(([
    keras.layers.Flatten(input_shape=(11,1)),
    keras.layers.Dense(300, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(300, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(300, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(300, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(1, activation='sigmoid')
]))

complex_model.summary()

2021-11-26 10:14:02.007188: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-11-26 10:14:02.009343: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-11-26 10:14:02.021693: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 11)                0         
_________________________________________________________________
dense (Dense)                (None, 300)               3600      
_________________________________________________________________
dense_1 (Dense)              (None, 300)               90300     
_________________________________________________________________
dense_2 (Dense)              (None, 300)               90300     
_________________________________________________________________
dense_3 (Dense)              (None, 300)               90300     
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 301       
Total params: 274,801
Trainable params: 274,801
Non-trainable params: 0
__________________________________________________

In [5]:
# Compile model with PR metrics as well as accuracy 
complex_model.compile(loss='binary_crossentropy', optimizer='Nadam', metrics=[keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall')])

In [6]:
#  Save best model
checkpoint_cb = keras.callbacks.ModelCheckpoint("second_model.h5", save_best_only=True)

history = complex_model.fit(X_train_scaled, y_train, epochs=20, validation_data=(X_test_scaled, y_test), callbacks=[tensorboard_cb, checkpoint_cb], class_weight=class_weight)

2021-11-26 10:15:16.183256: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-11-26 10:15:16.212960: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2394015000 Hz


Epoch 1/20
   2/9889 [..............................] - ETA: 46:01 - loss: 1.0269 - accuracy: 0.4453 - precision: 0.1270 - recall: 0.3651   

2021-11-26 10:15:20.526503: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-11-26 10:15:20.526592: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-11-26 10:15:20.660432: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.


   3/9889 [..............................] - ETA: 47:56 - loss: 1.0046 - accuracy: 0.4878 - precision: 0.1561 - recall: 0.4199

2021-11-26 10:15:20.702425: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-11-26 10:15:20.755000: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: ./my_logs/run_2021_11_26-10_13_52/train/plugins/profile/2021_11_26_10_15_20
2021-11-26 10:15:20.762998: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to ./my_logs/run_2021_11_26-10_13_52/train/plugins/profile/2021_11_26_10_15_20/sc01.dice.priv.trace.json.gz
2021-11-26 10:15:20.801002: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: ./my_logs/run_2021_11_26-10_13_52/train/plugins/profile/2021_11_26_10_15_20
2021-11-26 10:15:20.804265: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for memory_profile.json.gz to ./my_logs/run_2021_11_26-10_13_52/train/plugins/profile/2021_11_26_10_15_20/sc01.dice.priv.memory_profile.json.gz
2021-11-26 10:15:20.815973: 

 302/9889 [..............................] - ETA: 14:04 - loss: 0.7292 - accuracy: 0.6764 - precision: 0.4086 - recall: 0.6791

KeyboardInterrupt: 

In [14]:
# complex_model = keras.models.load_model('second_model.h5')
# complex_model.fit(X_train_scaled, y_train, epochs=10, validation_data=(X_test_scaled, y_test), callbacks=[tensorboard_cb, checkpoint_cb], class_weight=class_weight)