In [10]:
import numpy as np
from tensorflow import keras

X_train_scaled, X_test_scaled, y_train, y_test = np.load('SL_Higgs_input_data.npy', allow_pickle=True)


In [17]:
# Build NN using Keras sequential API
from tensorflow import keras

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=(11,1)),
    keras.layers.Dense(100, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(100, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(100, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(100, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(1, activation='sigmoid')
])
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_4 (Flatten)          (None, 11)                0         
_________________________________________________________________
dense_12 (Dense)             (None, 100)               1200      
_________________________________________________________________
dense_13 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_14 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_15 (Dense)             (None, 100)               10100     
_________________________________________________________________
dense_16 (Dense)             (None, 1)                 101       
Total params: 31,601
Trainable params: 31,601
Non-trainable params: 0
__________________________________________________

In [None]:
class Model:

    def __init__(self, data_path='SL_Higgs_input_data.npy', model):

        data = np.load(data_path)
        self.X_train, self.X_test, self.y_train, self.y_test = data
        self.model = model
        tot = len(self.y_train)
        pos = np.sum(self.y_train == 1)
        neg = tot - pos
        weight_for_0 = (1 / neg) * (tot / 2.0)
        weight_for_1 = (1 / pos) * (tot / 2.0)
        self.weights = {0: weight_for_0, 1: weight_for_1}
        
    
    def fit(self, epochs=50):
        self.history = self.model.fit()

        




In [18]:
# Imbalanced dataset so want to adjusts weights of signal and background training examples
tot = len(y_train)
pos = np.sum(y_train == 1)
neg = tot - pos
print(f'Total training samples:  {tot} \npositives:  {pos} \nnegatives:  {neg}')

# weight positives more than negatives
weight_for_0 = (1 / neg) * (tot / 2.0)
weight_for_1 = (1 / pos) * (tot / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}
print(f'Postive weight:  {weight_for_1} \nNegative weight:  {weight_for_0}')

Total training samples:  316424 
positives:  79444 
negatives:  236980
Postive weight:  1.991490861487337 
Negative weight:  0.6676175204658622


In [19]:
# Compile model with PR metrics as well as accuracy 
model.compile(loss='binary_crossentropy', optimizer='Nadam', metrics=[keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall')])

In [15]:
# Set up tensorboard stuff
import os
import time


def get_run_logdir():
    """Create filename for log with date and time""" 
    root_logdir = os.path.join(os.curdir, 'my_logs')
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S") 
    return os.path.join(root_logdir, run_id)

tensorboard_cb = keras.callbacks.TensorBoard(get_run_logdir())

2021-11-06 22:31:08.126501: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-11-06 22:31:08.126536: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-11-06 22:31:08.126589: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


In [20]:
# fit model for 10 epochs

history = model.fit(X_train_scaled, y_train, epochs=10, validation_data=(X_test_scaled, y_test), callbacks=[tensorboard_cb], class_weight=class_weight)

Epoch 1/10
   2/9889 [..............................] - ETA: 29:41 - loss: 0.9690 - accuracy: 0.5859 - precision: 0.3333 - recall: 0.4298  

2021-11-06 22:35:53.096365: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-11-06 22:35:53.102813: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.


  16/9889 [..............................] - ETA: 7:01 - loss: 0.7251 - accuracy: 0.6556 - precision: 0.4267 - recall: 0.6644 

2021-11-06 22:35:53.370284: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-11-06 22:35:53.376009: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-11-06 22:35:53.383023: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: ./my_logs/run_2021_11_06-22_31_08/train/plugins/profile/2021_11_06_22_35_53
2021-11-06 22:35:53.404864: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to ./my_logs/run_2021_11_06-22_31_08/train/plugins/profile/2021_11_06_22_35_53/sc01.dice.priv.trace.json.gz
2021-11-06 22:35:53.415817: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: ./my_logs/run_2021_11_06-22_31_08/train/plugins/profile/2021_11_06_22_35_53
2021-11-06 22:35:53.418940: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for memory_profile.json.gz to ./my_logs/run_2021_11_06-

Epoch 2/10
Epoch 3/10

KeyboardInterrupt: 

In [9]:
model.fit(X_train_scaled, y_train, validation_data=(X_test_scaled, y_test), callbacks=[tensorboard_cb], class_weight=class_weight, epochs=10)

Epoch 1/10
   2/9889 [..............................] - ETA: 52:11 - loss: 0.5684 - accuracy: 0.7031 - precision: 0.3478 - recall: 0.6667

2021-11-05 19:06:50.434437: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-11-05 19:06:50.458918: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.


  14/9889 [..............................] - ETA: 9:29 - loss: 0.5821 - accuracy: 0.6875 - precision: 0.4181 - recall: 0.6667 

2021-11-05 19:06:50.733048: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-11-05 19:06:50.736866: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-11-05 19:06:50.750220: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: ./my_logs/run_2021_11_05-18_16_11/train/plugins/profile/2021_11_05_19_06_50
2021-11-05 19:06:50.784969: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to ./my_logs/run_2021_11_05-18_16_11/train/plugins/profile/2021_11_05_19_06_50/sc01.dice.priv.trace.json.gz
2021-11-05 19:06:50.792293: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: ./my_logs/run_2021_11_05-18_16_11/train/plugins/profile/2021_11_05_19_06_50
2021-11-05 19:06:50.795563: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for memory_profile.json.gz to ./my_logs/run_2021_11_05-

Epoch 2/10
Epoch 3/10
1054/9889 [==>...........................] - ETA: 2:43 - loss: 0.5266 - accuracy: 0.7348 - precision: 0.4814 - recall: 0.7390

In [1]:
# try a more complicated model

complex_model = keras.models.Sequential(([
    keras.layers.Flatten(input_shape=(11,1)),
    keras.layers.Dense(300, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(300, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(300, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(300, activation='selu', kernel_initializer='lecun_normal'),
    keras.layers.Dense(1, activation='sigmoid')
]))

complex_model.summary()

NameError: name 'keras' is not defined

In [20]:
# Compile model with PR metrics as well as accuracy 
complex_model.compile(loss='binary_crossentropy', optimizer='Nadam', metrics=[keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall')])

In [10]:
# Save best model
checkpoint_cb = keras.callbacks.ModelCheckpoint("second_model.h5", save_best_only=True)

# history = complex_model.fit(X_train_scaled, y_train, epochs=10, validation_data=(X_test_scaled, y_test), callbacks=[tensorboard_cb, checkpoint_cb], class_weight=class_weight)

In [11]:
complex_model = keras.models.load_model('second_model.h5')
complex_model.fit(X_train_scaled, y_train, epochs=10, validation_data=(X_test_scaled, y_test), callbacks=[tensorboard_cb, checkpoint_cb], class_weight=class_weight)

2021-11-04 20:55:11.034406: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-11-04 20:55:11.047740: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2394015000 Hz


Epoch 1/10
   9/9889 [..............................] - ETA: 7:18 - loss: 0.5996 - accuracy: 0.6840 - precision: 0.4646 - recall: 0.7195 

2021-11-04 20:55:13.107568: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2021-11-04 20:55:13.107616: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2021-11-04 20:55:13.131710: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2021-11-04 20:55:13.140149: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2021-11-04 20:55:13.158913: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: ./my_logs/run_2021_11_04-20_54_52/train/plugins/profile/2021_11_04_20_55_13
2021-11-04 20:55:13.165648: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to ./my_logs/run_2021_11_04-20_54_52/train/plugins/profile/2021_11_04_20_55_13/sc01.dice.priv.trace.json.gz
2021-11-04 20:55:13.193380: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: ./my_logs/run_2021

