### This notebook demonstrates the alternative DCTR fitting method applied on Lund jet datasets

In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="3"

In [2]:
# standard library imports
from __future__ import absolute_import, division, print_function

import keras

# standard numerical library imports
import numpy as np
import scipy as sp

# energyflow imports
import energyflow as ef
from energyflow.archs import PFN
from energyflow.utils import data_split, remap_pids, to_categorical

import matplotlib.pyplot as plt

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
# Global plot settings
from matplotlib import rc
import matplotlib.font_manager
rc('font', family='serif')
rc('text', usetex=True)
rc('font', size=22) 
rc('xtick', labelsize=15) 
rc('ytick', labelsize=15) 
rc('legend', fontsize=15)

# Load Data

## Utilities

In [4]:
# normalize pT and center (y, phi)
def normalize(x):
    mask = x[:,0] > 0
    yphi_avg = np.average(x[mask,1:3], weights=x[mask,0], axis=0)
    x[mask,1:3] -= yphi_avg
    x[mask,0] /= x[:,0].sum()

In [5]:
def preprocess_data(X):
    for x in X:
        normalize(x)
    
    # Remap PIDs to unique values in range [0,1]
    remap_pids(X, pid_i=3)
    return X

# Build Model

In [6]:
# network architecture parameters
Phi_sizes = (100,100, 128)
F_sizes = (100,100, 100)

dctr = PFN(input_dim=7, 
           Phi_sizes=Phi_sizes, F_sizes=F_sizes,
           summary=False)

# Load Model

In [7]:
#load model from saved file
dctr.model.load_weights('./saved_models/DCTR_ee_dijets_1D_aLund.h5')

# Fitting

## Loss Curve Model

In [10]:
class AddParams2Input(keras.layers.Layer):
    """ Custom layer for tuning with DCTR: 
    Arguments:
    - n_MC_params : (int) - the number of n_MC_params that are in X_dim
    - default_MC_params : (list of floats) - default values for each of the MC parameters
    - trainable_MC_params : (list of booleans) - True for parameters that you want to fit, false for parameters that should be fixed at default value

    Usage: 
    Let X_dim be the input dimension of each particle to a PFN model, and n_MC_params be the number of MC parameters. 
    Defines a Layer that takes in an array of dimension 
    (batch_size, padded_multiplicity, X_dim - n_MC_params)
    This layer appends each particle by the default_MC_params and makes then trainable or non-trainable based on trainable_MC_params
    """
    
    def __init__(self, n_MC_params, default_MC_params, trainable_MC_params):
        super(AddParams2Input, self).__init__()
        # Definitions
        self.n_MC_params = n_MC_params
        self.MC_params = default_MC_params
        self.trainable_MC_params = trainable_MC_params

    
    def build(self, input_shape):
        # Convert input MC parameters to weights and make then trainable or non-trainable
        for i in range(self.n_MC_params):
            self.MC_params[i] = self.add_weight(name='MC_param_{}'.format(i), 
                                                shape=(1, 1),
                                                initializer=keras.initializers.Constant(self.MC_params[i]),
                                                trainable=self.trainable_MC_params[i])
            
        self.MC_params = keras.backend.tf.concat(self.MC_params, axis = -1)
        super(AddParams2Input, self).build(input_shape)
    
    def call(self, input):
        # Add MC params to each input particle (but not to the padded rows)
        concat_input_and_params = keras.backend.tf.where(keras.backend.abs(input[...,0])>0,
                                                         self.MC_params*keras.backend.ones_like(input[...,0:self.n_MC_params]),
                                                         keras.backend.zeros_like(input[...,0:self.n_MC_params]))
        return keras.backend.concatenate([input, concat_input_and_params], -1)
    
    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[1]+self.n_MC_params)

In [12]:
def get_DCTR_fit_model(DCTR_model, 
                       X_dim, 
                       n_MC_params, 
                       default_MC_params,
                       trainable_MC_params):
    """ 
    Get a DCTR model that trains on the input MC parameters
    
    Arguments:
    - DCTR_model : a PFN model that has been trained on a to continuously interpolate over the input MC dimensions
    - X_dim : (int) - the dimension of the input expected by DCTR_model
    - n_MC_params : (int) - the number of n_MC_params that are in X_dim
    - default_MC_params : (list of floats) - default values for each of the MC parameters
    - trainable_MC_params : (list of booleans) - True for parameters that you want to fit, false for parameters that should be fixed at default value

    Returns:
    - DCTR_fit_model: a compiled model that gradient descends only on the trainable MC parameters
    """
    
    # Do sanity checks on inputs
    assert X_dim >=n_MC_params, "X_dim must be larger than n_MC_params. X_dim includes the dimensionality of the 4-vector + number of MC parameters"
    assert n_MC_params == len(default_MC_params), "Dimension mismatch between n_MC_params and number of default MC parameters given. len(default_MC_params) must equal n_MC_params"
    assert n_MC_params == len(trainable_MC_params), "Dimension mismatch between n_MC_params and trainable_MC_params. len(trainable_MC_params) must equal n_MC_params."
    assert np.any(trainable_MC_params), "All parameters are set to non-trainable."
    
    # Define input to DCTR_fit_model
    non_param_input = keras.layers.Input((None, X_dim - n_MC_params))

    # Construct layer that adds trainable and non-trainable parameters to the input
    add_params_layer = AddParams2Input(n_MC_params, default_MC_params, trainable_MC_params)
    time_dist     = keras.layers.TimeDistributed(add_params_layer, name='tdist')(non_param_input)     

    # Set all weights in DCTR_model to non-trainable
    for layer in DCTR_model.model.layers:
        layer.trainable = False
        
    # get the graph and the weights from the DCTR_model
    output = DCTR_model.model(inputs = time_dist)

    # Define full model
    DCTR_fit_model = fitmodel = keras.models.Model(inputs = non_param_input, outputs = output)
    optimizer = keras.optimizers.Adam(lr=1e-4)
    # Compile with loss function
    DCTR_fit_model.compile(optimizer=optimizer, loss='categorical_crossentropy')
    
    
    
    return DCTR_fit_model

In [13]:
def set_MC_params(dctr_fit_model, MC_params):
    alphaS, aLund, StoUD = MC_params
    weights = [np.array([[alphaS]],   dtype=np.float32),
               np.array([[aLund]],    dtype=np.float32),
               np.array([[StoUD]], dtype=np.float32)]
    dctr_fit_model.layers[1].set_weights(weights)

## Load Data

In [22]:
default_dataset = np.load(data_dir + 'test1D_default.npz')
unknown_dataset = np.load(data_dir + 'test1D_aLund.npz')

In [23]:
X_default = preprocess_data(default_dataset['jet'][:,:,:4])
X_unknown = preprocess_data(unknown_dataset['jet'][:,:,:4])

Y_default = np.zeros_like(X_unknown[:,0,0])
Y_unknown = np.ones_like(X_unknown[:,0,0])

In [35]:
X_fit = np.concatenate((X_default, X_unknown), axis = 0)

Y_fit = np.concatenate((Y_default, Y_unknown), axis = 0)
Y_fit = to_categorical(Y_fit, num_classes=2)

In [36]:
X_fit, _, Y_fit, _ = data_split(X_fit, Y_fit, test=0, shuffle=True)

## Tune with Gradient Descent

In [37]:
dctr_fit_model = get_DCTR_fit_model(dctr, 
                       X_dim =7, 
                       n_MC_params = 3, 
                       default_MC_params   = [0.1365, 0.68, 0.217], # default params for [alpha_s, aLund, StoUD]
                       trainable_MC_params = [False, True, False]) # Only train aLund

dctr_fit_model.summary()
set_MC_params(dctr_fit_model, [0.68, 0.1365, 0.217])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, None, 4)           0         
_________________________________________________________________
tdist (TimeDistributed)      (None, None, 7)           3         
_________________________________________________________________
model_1 (Model)              (None, 2)                 57130     
Total params: 57,133
Trainable params: 1
Non-trainable params: 57,132
_________________________________________________________________


In [38]:
dctr_fit_model.layers[0].trainable,dctr_fit_model.layers[1].trainable,dctr_fit_model.layers[2].trainable

(False, True, True)

In [39]:
print_weights = keras.callbacks.LambdaCallback(on_epoch_end=lambda batch, logs: print("aLund fit = ", 
                                               dctr_fit_model.get_weights()[0][0][0]))
fit_vals = [0.68]
append_weights = keras.callbacks.LambdaCallback(on_epoch_end=lambda batch, logs: 
                                               fit_vals.append(dctr_fit_model.get_weights()[0][0][0]))
callbacks = [print_weights, append_weights]



In [40]:
for i in range(100):
    dctr_fit_model.layers[0].trainable = False
    dctr_fit_model.layers[1].trainable = True
    dctr_fit_model.layers[2].trainable = False
    
    optimizer = keras.optimizers.Adam(lr=1e-4)
    # Compile with loss function
    dctr_fit_model.compile(optimizer=optimizer, loss='categorical_crossentropy')

    dctr_fit_model.fit(X_fit[:int(len(X_fit)/10)], Y_fit[:int(len(X_fit)/10)],
           epochs=1, 
           batch_size=10000,
           callbacks = callbacks)
    
    dctr_fit_model.layers[0].trainable = False
    dctr_fit_model.layers[1].trainable = False
    dctr_fit_model.layers[2].trainable = True
    
    optimizer = keras.optimizers.Adam(lr=1e-4)
    # Compile with loss function
    dctr_fit_model.compile(optimizer=optimizer, loss='categorical_crossentropy')

    dctr_fit_model.fit(X_fit[:int(len(X_fit)/10)], Y_fit[:int(len(X_fit)/10)],
           epochs=1, 
           batch_size=10000,
           callbacks = callbacks)

Epoch 1/1
aLund fit =  0.6816125
Epoch 1/1
aLund fit =  0.6816125
Epoch 1/1
aLund fit =  0.68334705
Epoch 1/1
aLund fit =  0.68334705
Epoch 1/1
aLund fit =  0.6850707
Epoch 1/1
aLund fit =  0.6850707
Epoch 1/1
aLund fit =  0.68675596
Epoch 1/1
aLund fit =  0.68675596
Epoch 1/1
aLund fit =  0.6884318
Epoch 1/1
aLund fit =  0.6884318
Epoch 1/1
aLund fit =  0.6900476
Epoch 1/1
aLund fit =  0.6900476
Epoch 1/1
aLund fit =  0.69180214
Epoch 1/1
aLund fit =  0.69180214
Epoch 1/1
aLund fit =  0.6934119
Epoch 1/1
aLund fit =  0.6934119
Epoch 1/1
aLund fit =  0.69501805
Epoch 1/1
aLund fit =  0.69501805
Epoch 1/1
aLund fit =  0.69668305
Epoch 1/1
aLund fit =  0.69668305
Epoch 1/1
aLund fit =  0.69830906
Epoch 1/1
aLund fit =  0.69830906
Epoch 1/1
aLund fit =  0.69995576
Epoch 1/1
aLund fit =  0.69995576
Epoch 1/1
aLund fit =  0.7015646
Epoch 1/1
aLund fit =  0.7015646
Epoch 1/1
aLund fit =  0.7030521
Epoch 1/1
aLund fit =  0.7030521
Epoch 1/1
aLund fit =  0.70479673
Epoch 1/1
aLund fit =  0.704

In [None]:
for i in range(100):
    dctr_fit_model.layers[0].trainable = False
    dctr_fit_model.layers[1].trainable = True
    dctr_fit_model.layers[2].trainable = False
    
    optimizer = keras.optimizers.Adam(lr=1e-4)
    # Compile with loss function
    dctr_fit_model.compile(optimizer=optimizer, loss='categorical_crossentropy')

    dctr_fit_model.fit(X_fit[:int(len(X_fit)/10)], Y_fit[:int(len(X_fit)/10)],
           epochs=1, 
           batch_size=10000,
           callbacks = callbacks)
    
    dctr_fit_model.layers[0].trainable = False
    dctr_fit_model.layers[1].trainable = False
    dctr_fit_model.layers[2].trainable = True
    
    optimizer = keras.optimizers.Adam(lr=1e-4)
    # Compile with loss function
    dctr_fit_model.compile(optimizer=optimizer, loss='categorical_crossentropy')

    dctr_fit_model.fit(X_fit[:int(len(X_fit)/10)], Y_fit[:int(len(X_fit)/10)],
           epochs=1, 
           batch_size=10000,
           callbacks = callbacks)

Epoch 1/1
aLund fit =  0.7748053
Epoch 1/1
aLund fit =  0.7748053
Epoch 1/1
aLund fit =  0.7748424
Epoch 1/1
aLund fit =  0.7748424
Epoch 1/1
aLund fit =  0.77477103
Epoch 1/1
aLund fit =  0.77477103
Epoch 1/1
aLund fit =  0.774617
Epoch 1/1
aLund fit =  0.774617
Epoch 1/1
aLund fit =  0.7741826
Epoch 1/1
aLund fit =  0.7741826
Epoch 1/1
aLund fit =  0.7736157
Epoch 1/1
aLund fit =  0.7736157
Epoch 1/1
aLund fit =  0.7741639
Epoch 1/1
aLund fit =  0.7741639
Epoch 1/1
aLund fit =  0.7743515
Epoch 1/1
aLund fit =  0.7743515
Epoch 1/1
aLund fit =  0.7743987
Epoch 1/1
aLund fit =  0.7743987
Epoch 1/1
aLund fit =  0.7745927
Epoch 1/1
aLund fit =  0.7745927
Epoch 1/1
aLund fit =  0.7747601
Epoch 1/1
aLund fit =  0.7747601
Epoch 1/1
aLund fit =  0.77482027
Epoch 1/1
aLund fit =  0.77482027
Epoch 1/1
aLund fit =  0.7748107
Epoch 1/1
aLund fit =  0.7748107
Epoch 1/1
aLund fit =  0.77449423
Epoch 1/1
aLund fit =  0.77449423
Epoch 1/1
aLund fit =  0.7742807
Epoch 1/1
aLund fit =  0.7742807
Epoch 