In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import os
import keras
import tensorflow as tf 
print(tf.__version__)
print(keras.__version__)
print(pd.__version__)
print(np.__version__)
import re
import glob
# Script Imports
from nn_globals import *
from nn_plotting import gaus, fit_gaus, corr_plot
from dataset import muon_data_split
from nn_evaluate import huber_loss
from nn_training import train_model, lr_schedule
# Keras/TF import
from keras.models import Model, load_model
from keras.regularizers import L1L2
from tensorflow.keras.optimizers import Adam
from keras.layers import Input, BatchNormalization, Dense, Activation
from keras.callbacks import LearningRateScheduler, TerminateOnNaN, ModelCheckpoint

2.6.0
2.6.0
1.3.4
1.19.5


In [2]:
# Import muon data
# 'x' is the array of input variables, 'y' is the q/pT
x_train_displ, x_test_displ, y_train_displ, y_test_displ, dxy_train_displ, dxy_test_displ= muon_data_split(filename=DATAFILEPATH, 
                                                                                                           reg_pt_scale=REG_PT_SCALE, 
                                                                                                           reg_dxy_scale=REG_DXY_SCALE, 
                                                                                                           test_size=TEST_SIZE,
                                                                                                           nvariables = NVARIABLES,
                                                                                                           nentries= NENTRIES,
                                                                                                           batch_size = 128)


[INFO    ] Loading muon data from ./data/NN_input_params_FlatXYZ.npz ...
[INFO    ] Loaded the variables with shape (19300000, 25)
[INFO    ] Loaded the parameters with shape (19300000, 6)
[INFO    ] Loaded the encoded variables with shape (3284620, 23)
[INFO    ] Loaded the encoded parameters with shape (3284620,)
[INFO    ] Loaded # of training and testing events: (2249964, 1034656)


In [3]:
def create_model(nvariables, lr=0.001, clipnorm=10., initializer = "glorot_uniform",
                nodes1=64, nodes2=32, nodes3=16, outnodes=2,
                l1_reg = 0.0, l2_reg = 0.0):
  
    regularizer = L1L2(l1=l1_reg, l2=l2_reg)
    bn_momentum = 0.9
    eps = 1e-4

    x = x_in = Input((nvariables,))
    x = BatchNormalization(epsilon=eps, momentum=bn_momentum,name="bn-input")(x)
    
    x = Dense(nodes1, 
               kernel_initializer=initializer,
               use_bias = False,
               kernel_regularizer = regularizer,
               name="hidden-dense-1")(x)
    x = BatchNormalization(epsilon = eps, momentum  = bn_momentum, name = "bn-1")(x)
    x = Activation(activation = "tanh",name="act_1")(x)
    
    if nodes2:
    
        x = Dense(nodes2, 
                   kernel_initializer=initializer,
                   use_bias = False,
                   kernel_regularizer = regularizer,
                   name="hidden-dense-2")(x)
        x = BatchNormalization(epsilon = eps, momentum  = bn_momentum, name = "bn-2")(x)
        x = Activation(activation = "tanh",name="act_2")(x)
        if nodes3:

            x = Dense(nodes3, 
                       kernel_initializer=initializer,
                       kernel_regularizer = regularizer,
                       use_bias = False,
                       name="hidden-dense-3")(x)
            x = BatchNormalization(epsilon = eps, momentum  = bn_momentum, name = "bn-3")(x)
            x = Activation(activation = "tanh", name="act_3")(x)

    x = Dense(outnodes,kernel_initializer = initializer,name="dense-output")(x)
    x = Activation("linear")(x)
    
    model = Model(inputs=x_in, outputs=x,name="baseline-model")
    
    adam = Adam(lr=lr, clipnorm=clipnorm)
    model.compile(optimizer=adam, 
                  loss=huber_loss, 
                  metrics=['acc','mse','mae'])
    model.summary()
    
    return model
    

In [None]:
assert(keras.backend.backend() == 'tensorflow')

normal_epochs = 300
normal_batch_size = 2000
l1_reg = 0.0
l2_reg = 0.0
learning_rate = 0.0063
gradient_clip_norm = 100.

lr_decay = LearningRateScheduler(lr_schedule, verbose=1)
terminate_on_nan = TerminateOnNaN()
model_training_checkpoint = ModelCheckpoint(monitor="val_loss", 
                                           verbose = 1,
                                           filepath = "checkpoints/model_ckpt_epoch_{epoch:02d}.hdf5",
                                           period = 10)
model = create_model(
                    nvariables = NVARIABLES, 
                    lr = learning_rate, 
                    clipnorm = gradient_clip_norm, 
                    nodes1=10, 
                    nodes2=7, 
                    nodes3=5, 
                    outnodes=2,
                    l1_reg = l1_reg, 
                    l2_reg = l2_reg)

logger.info('Training model with l1_reg: {0} l2_reg: {1}'.format(l1_reg, l2_reg))

model, history = train_model(model, 
                      x_train_displ, 
                      np.column_stack((y_train_displ, dxy_train_displ)),
                      save_model=False, 
                      epochs=normal_epochs, 
                      batch_size=normal_batch_size,
                      callbacks=[lr_decay,terminate_on_nan, model_training_checkpoint], 
                      validation_split=0.1, 
                      verbose=True)

metrics = [len(history.history['loss']), history.history['loss'][-1], history.history['val_loss'][-1]]
logger.info('Epoch {0}/{0} - loss: {1} - val_loss: {2}'.format(*metrics))

2022-01-11 10:52:05.498337: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-01-11 10:52:05.499529: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
[INFO    ] Training model with l1_reg: 0.0 l2_reg: 0.0
[INFO    ] Begin training ...


Metal device set to: Apple M1
Model: "baseline-model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 23)]              0         
_________________________________________________________________
bn-input (BatchNormalization (None, 23)                92        
_________________________________________________________________
hidden-dense-1 (Dense)       (None, 10)                230       
_________________________________________________________________
bn-1 (BatchNormalization)    (None, 10)                40        
_________________________________________________________________
act_1 (Activation)           (None, 10)                0         
_________________________________________________________________
hidden-dense-2 (Dense)       (None, 7)                 70        
_________________________________________________________________
bn-2 (BatchNormalizati

2022-01-11 10:52:05.769980: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2022-01-11 10:52:05.772927: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 1/300

Epoch 00001: LearningRateScheduler setting learning rate to 0.006300000008195639.


2022-01-11 10:52:06.107368: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.




2022-01-11 10:52:22.272572: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/300

Epoch 00002: LearningRateScheduler setting learning rate to 0.006300000008195639.
Epoch 3/300

Epoch 00003: LearningRateScheduler setting learning rate to 0.006300000008195639.
Epoch 4/300

Epoch 00004: LearningRateScheduler setting learning rate to 0.006300000008195639.
Epoch 5/300

Epoch 00005: LearningRateScheduler setting learning rate to 0.006300000008195639.
Epoch 6/300

Epoch 00006: LearningRateScheduler setting learning rate to 0.006300000008195639.
Epoch 7/300

Epoch 00007: LearningRateScheduler setting learning rate to 0.006300000008195639.
Epoch 8/300

Epoch 00008: LearningRateScheduler setting learning rate to 0.006300000008195639.
Epoch 9/300

Epoch 00009: LearningRateScheduler setting learning rate to 0.006300000008195639.
Epoch 10/300

Epoch 00010: LearningRateScheduler setting learning rate to 0.006300000008195639.

Epoch 00010: saving model to checkpoints/model_ckpt_epoch_10.hdf5
Epoch 11/300

Epoch 00011: LearningRateScheduler setting learning rate to 0.0

Epoch 30/300

Epoch 00030: LearningRateScheduler setting learning rate to 0.005102999974042177.

Epoch 00030: saving model to checkpoints/model_ckpt_epoch_30.hdf5
Epoch 31/300

Epoch 00031: LearningRateScheduler setting learning rate to 0.004592699976637959.
Epoch 32/300

Epoch 00032: LearningRateScheduler setting learning rate to 0.004592699930071831.
Epoch 33/300

Epoch 00033: LearningRateScheduler setting learning rate to 0.004592699930071831.
Epoch 34/300

Epoch 00034: LearningRateScheduler setting learning rate to 0.004592699930071831.
Epoch 35/300

Epoch 00035: LearningRateScheduler setting learning rate to 0.004592699930071831.
Epoch 36/300

Epoch 00036: LearningRateScheduler setting learning rate to 0.004592699930071831.
Epoch 37/300

Epoch 00037: LearningRateScheduler setting learning rate to 0.004592699930071831.
Epoch 38/300

Epoch 00038: LearningRateScheduler setting learning rate to 0.004592699930071831.
Epoch 39/300

Epoch 00039: LearningRateScheduler setting learning rat

Epoch 57/300

Epoch 00057: LearningRateScheduler setting learning rate to 0.0037200867664068937.
Epoch 58/300

Epoch 00058: LearningRateScheduler setting learning rate to 0.0037200867664068937.
Epoch 59/300

Epoch 00059: LearningRateScheduler setting learning rate to 0.0037200867664068937.
Epoch 60/300

Epoch 00060: LearningRateScheduler setting learning rate to 0.0037200867664068937.

Epoch 00060: saving model to checkpoints/model_ckpt_epoch_60.hdf5
Epoch 61/300

Epoch 00061: LearningRateScheduler setting learning rate to 0.0033480780897662044.
Epoch 62/300

Epoch 00062: LearningRateScheduler setting learning rate to 0.0033480781130492687.
Epoch 63/300

Epoch 00063: LearningRateScheduler setting learning rate to 0.0033480781130492687.
Epoch 64/300

Epoch 00064: LearningRateScheduler setting learning rate to 0.0033480781130492687.
Epoch 65/300

Epoch 00065: LearningRateScheduler setting learning rate to 0.0033480781130492687.
Epoch 66/300

Epoch 00066: LearningRateScheduler setting lea

Epoch 85/300

Epoch 00085: LearningRateScheduler setting learning rate to 0.002711943117901683.
Epoch 86/300

Epoch 00086: LearningRateScheduler setting learning rate to 0.002711943117901683.
Epoch 87/300

Epoch 00087: LearningRateScheduler setting learning rate to 0.002711943117901683.
Epoch 88/300

Epoch 00088: LearningRateScheduler setting learning rate to 0.002711943117901683.
Epoch 89/300

Epoch 00089: LearningRateScheduler setting learning rate to 0.002711943117901683.
Epoch 90/300

Epoch 00090: LearningRateScheduler setting learning rate to 0.002711943117901683.

Epoch 00090: saving model to checkpoints/model_ckpt_epoch_90.hdf5
Epoch 91/300

Epoch 00091: LearningRateScheduler setting learning rate to 0.0024407488061115147.
Epoch 92/300

Epoch 00092: LearningRateScheduler setting learning rate to 0.0024407487362623215.
Epoch 93/300

Epoch 00093: LearningRateScheduler setting learning rate to 0.0024407487362623215.
Epoch 94/300

Epoch 00094: LearningRateScheduler setting learning 

Epoch 112/300

Epoch 00112: LearningRateScheduler setting learning rate to 0.0019770064391195774.
Epoch 113/300

Epoch 00113: LearningRateScheduler setting learning rate to 0.0019770064391195774.
Epoch 114/300

Epoch 00114: LearningRateScheduler setting learning rate to 0.0019770064391195774.
Epoch 115/300

Epoch 00115: LearningRateScheduler setting learning rate to 0.0019770064391195774.
Epoch 116/300

Epoch 00116: LearningRateScheduler setting learning rate to 0.0019770064391195774.
Epoch 117/300

Epoch 00117: LearningRateScheduler setting learning rate to 0.0019770064391195774.
Epoch 118/300

Epoch 00118: LearningRateScheduler setting learning rate to 0.0019770064391195774.
Epoch 119/300

Epoch 00119: LearningRateScheduler setting learning rate to 0.0019770064391195774.
Epoch 120/300

Epoch 00120: LearningRateScheduler setting learning rate to 0.0019770064391195774.

Epoch 00120: saving model to checkpoints/model_ckpt_epoch_120.hdf5
Epoch 121/300

Epoch 00121: LearningRateScheduler 