### Part - I: Importing Required Modules/ Packages

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import tensorflow as tf 
import keras
print(tf.__version__)
print(keras.__version__)
print(pd.__version__)
print(np.__version__)

2.6.0
2.6.0
1.3.4
1.19.5


In [2]:
# custom declarations 
from nn_globals import *
from nn_plotting import gaus, fit_gaus, corr_plot

### Part- II: Data Preprocessing

In [3]:
def _handle_nan_in_x(x):
    x[np.isnan(x)] = 0.0
    x[x==-999.0] = 0.0
    return x

def _zero_out_x(x):
    x = 0.0
    return x
    
def _fixME1Ring(x):
    for i in range(len(x)):
        if (x[i,0] != 0.0): x[i,18] = x[i,18] + 1
    return x   

def muon_data(filename, reg_pt_scale=1.0, reg_dxy_scale=1.0, correct_for_eta=False):
    try:
        logger.info('Loading muon data from {0} ...'.format(filename))
        loaded = np.load(filename)
        the_variables = loaded['variables']
        the_parameters = loaded['parameters']
        # print(the_variables.shape)
        the_variables = the_variables[:nentries]
        the_parameters = the_parameters[:nentries]
        logger.info('Loaded the variables with shape {0}'.format(the_variables.shape))
        logger.info('Loaded the parameters with shape {0}'.format(the_parameters.shape))
    except:
        logger.error('Failed to load data from file: {0}'.format(filename))

    assert(the_variables.shape[0] == the_parameters.shape[0])
    _handle_nan_in_x(the_variables)
      #_fixME1Ring(the_variables)
    _handle_nan_in_x(the_parameters)
    mask = np.logical_or(np.logical_or( np.logical_or((the_variables[:,23] == 11), (the_variables[:,23] == 13)), (the_variables[:,23] == 14)),(the_variables[:,23] == 15)) 

    the_variables = the_variables[mask]  
    the_parameters = the_parameters[mask]  
    assert(the_variables.shape[0] == the_parameters.shape[0])

    x = the_variables[:,0:23]
    y = reg_pt_scale*the_parameters[:,0]
    phi = the_parameters[:,1] 
    eta = the_parameters[:,2] 
    vx = the_parameters[:,3] 
    vy = the_parameters[:,4] 
    vz = the_parameters[:,5]      
    dxy = vy * np.cos(phi) - vx * np.sin(phi)  
    logger.info('Loaded the encoded variables with shape {0}'.format(x.shape))
    logger.info('Loaded the encoded parameters with shape {0}'.format(y.shape))

    return x, y, dxy

def muon_data_split(filename, reg_pt_scale=1.0, reg_dxy_scale=1.0, test_size=0.5, correct_for_eta=False):
    x, y, dxy= muon_data(filename, reg_pt_scale=reg_pt_scale, reg_dxy_scale=reg_dxy_scale, correct_for_eta=correct_for_eta)

    # Split dataset in training and testing
    x_train, x_test, y_train, y_test, dxy_train, dxy_test = train_test_split(x, y, dxy,test_size=test_size)
    logger.info('Loaded # of training and testing events: {0}'.format((x_train.shape[0], x_test.shape[0])))

    # Check for cases where the number of events in the last batch could be too few
    validation_split = 0.1
    train_num_samples = int(x_train.shape[0] * (1.0-validation_split))
    val_num_samples = x_train.shape[0] - train_num_samples
    batch_size = 128
    if (train_num_samples%batch_size) < 100:
        logger.warning('The last batch for training could be too few! ({0}%{1})={2}. Please change test_size.'.format(train_num_samples, batch_size, train_num_samples%batch_size))
        logger.warning('Try this formula: int(int({0}*{1})*{2}) % 128'.format(x.shape[0], 1.0-test_size, 1.0-validation_split))
    train_num_samples = int(x_train.shape[0] * 2 * (1.0-validation_split))
    val_num_samples = x_train.shape[0] - train_num_samples
    batch_size = 128
    if (train_num_samples%batch_size) < 100:
        logger.warning('The last batch for training after mixing could be too few! ({0}%{1})={2}. Please change test_size.'.format(train_num_samples, batch_size, train_num_samples%batch_size))
        logger.warning('Try this formula: int(int({0}*{1})*2*{2}) % 128'.format(x.shape[0], 1.0-test_size, 1.0-validation_split))
    return x_train, x_test, y_train, y_test, dxy_train, dxy_test

In [4]:
x_train_displ, x_test_displ, y_train_displ, y_test_displ, dxy_train_displ, dxy_test_displ =  muon_data_split(infile_muon_displ, 
                                                                                                                reg_pt_scale=reg_pt_scale, 
                                                                                                                reg_dxy_scale=reg_dxy_scale, 
                                                                                                                test_size=0.315)
y_train_displ = np.abs(y_train_displ)
y_test_displ = np.abs(y_test_displ)

[INFO    ] Loading muon data from /Users/gpradhan/Desktop/optimization-nn-for-cms-muon-trigger/data/NN_input_params_FlatXYZ.npz ...
[INFO    ] Loaded the variables with shape (19300000, 25)
[INFO    ] Loaded the parameters with shape (19300000, 6)
[INFO    ] Loaded the encoded variables with shape (3284620, 23)
[INFO    ] Loaded the encoded parameters with shape (3284620,)
[INFO    ] Loaded # of training and testing events: (2249964, 1034656)


### Part-III: Use the cells in this section for data exploration

In [None]:
# cols = ["dphi_1","dphi_2","dphi_3","dphi_4","dphi_5","dphi_6",
#        "dtheta_1","dtheta_2","dtheta_3","dtheta_4","dtheta_5", "dtheta_6",
#        "bend_1","bend_2","bend_3","bend_4",
#        "track theta"]

# x = np.concatenate((x_train_displ,x_test_displ),axis=0)
# y = np.concatenate((y_train_displ,y_test_displ),axis=0)
# dxy = np.concatenate((dxy_train_displ,dxy_test_displ),axis=0)

# corr_plot(x,y,dxy,columns = cols)

### Part- III: Load and Account the Perf for the Baseline

In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import (LearningRateScheduler, 
                                        TerminateOnNaN, 
                                        EarlyStopping)

# project-specific imports
from nn_evaluate import huber_loss, k_fold_validation
from nn_training import lr_schedule
from nn_pruning_module_support import loading_trained_model
from nn_training_pruned_model import (generate_layer_masks, 
                                      create_sparse_model,
                                      train_sparse_model)
from nn_pruning_module_support import __generate_delta_plots__

In [None]:
baseline = loading_trained_model(filepath = "./models/",
                                 model_filename = "model")
baseline.summary()

In [None]:
# Quantitative Evaluation
k_fold_validation(model = baseline, 
          x = x_test_displ, 
          y = y_test_displ, 
          dxy = dxy_test_displ, 
          folds =1,
          metric_type = "MAE")    
k_fold_validation(model = baseline, 
          x = x_test_displ, 
          y = y_test_displ, 
          dxy = dxy_test_displ, 
          folds =1,
          metric_type = "RMSE")   

# Qualitative Evaluation:
__generate_delta_plots__(model = baseline,
                          x = x_test_displ,
                          y = y_test_displ,
                          dxy = dxy_test_displ,
                          color = "salmon")

### Part-IV: Build the custom model

In [None]:
def run_iterative_pruning_v1(baseline_model = None,
                          target_sparsity: float = 0.1, 
                          pruning_fraction_step:float = 0.1,
                          training_params: list = []):
    """
    Uses a new train-test-val split for each pruning cycle. 
    """
    
    if target_sparsity > 1.0 or target_sparsity <= 0:
        print("INVALID value entered for target sparsity, it can only be in the range [0,1]")
    if pruning_fraction_step > target_sparsity:
        print("INVALID value entered for pruning fraction, it has to be <= target_sparsity")
    
    # list of models, new pruned models get appended to it while training
    pruned_models, training_history = [],[]
    plot_colors = ["red","orange","blue","cyan","purple","green","magenta", "salmon"]
    init_sparsity = pruning_fraction_step
    i = 0
    while(init_sparsity <= target_sparsity):

        print("-----------------------------------------------------------------------------------------------")
        print("-----------------------------------------------------------------------------------------------")
        print("Currently pruning the model upto {} % of the baseline".format(round(init_sparsity*100)))
        print("-----------------------------------------------------------------------------------------------")
        print("-----------------------------------------------------------------------------------------------")

        x_train_displ, x_test_displ, y_train_displ, y_test_displ, dxy_train_displ, dxy_test_displ =  muon_data_split(infile_muon_displ, 
                                                                                                                       reg_pt_scale=reg_pt_scale, 
                                                                                                                       reg_dxy_scale=reg_dxy_scale, 
                                                                                                                       test_size=0.315)
        y_train_displ = np.abs(y_train_displ)
        y_test_displ = np.abs(y_test_displ)

        # training loop begins
        lr = training_params[i]['lr']
        clipnorm = training_params[i]['clipnorm']
        eps = training_params[i]['eps']
        momentum = training_params[i]['momentum']
        retrain_epochs = training_params[i]['epochs']
        retrain_batch_size = training_params[i]['batch_size']
        l1_reg = training_params[i]['l1_reg']
        l2_reg = training_params[i]['l2_reg']
        sparsity = init_sparsity
        
        # define optimizer, callbacks here
        adam = Adam(lr=lr, clipnorm=clipnorm)
        lr_decay = LearningRateScheduler(lr_schedule, verbose=1)
        terminate_on_nan = TerminateOnNaN()
        early_stopping = EarlyStopping(monitor='val_loss', min_delta=1e-6, patience=18, 
                                       verbose=True,
                                       mode='min')
        curr_model = None
        if len(pruned_models) == 0:
            curr_model = baseline_model
        else:
            curr_model = pruned_models[-1]

        pruned_model = create_sparse_model(model = curr_model,
                                               input_dim = nvariables,
                                               output_dim = 2,
                                               k_sparsity = sparsity,
                                               bn_epsilon = eps,
                                               bn_momentum = momentum,
                                               l1_reg = l1_reg,
                                               l2_reg = l1_reg,
                                               kernel_initializer="glorot_uniform",
                                               optimizer = adam)

        pruned_model, history = train_sparse_model(sparse_model = pruned_model,
                                                           x = x_train_displ,
                                                           y = y_train_displ,
                                                           dxy = dxy_train_displ,
                                                           retrain_epochs = retrain_epochs,
                                                           batch_size = retrain_batch_size,
                                                           callbacks=[lr_decay, 
                                                                      early_stopping, 
                                                                      terminate_on_nan],
                                                           verbose = True,
                                                           validation_split=0.1)
        
        # evaluate the model
        k_fold_validation(model = pruned_model, 
                  x = x_test_displ, 
                  y = y_test_displ, 
                  dxy = dxy_test_displ, 
                  folds =1,
                  metric_type = "MAE")    
        k_fold_validation(model = pruned_model, 
                  x = x_test_displ, 
                  y = y_test_displ, 
                  dxy = dxy_test_displ, 
                  folds =1,
                  metric_type = "RMSE")   

        __generate_delta_plots__(model = pruned_model,
                                 x = x_test_displ,
                                 y = y_test_displ,
                                 dxy = dxy_test_displ,
                                 color = plot_colors[i])
        
        pruned_models.append(pruned_model)
        training_history.append(history)

        # training ends
        i += 1
        init_sparsity += pruning_fraction_step
    
    return pruned_models, training_history

In [None]:
def run_iterative_pruning_v2(X_train, y_train, dxy_train,
                            X_test, y_test, dxy_test,
                            baseline_model = None,
                            init_sparsity: float = 0.1,
                            target_sparsity: float = 1.0, 
                            pruning_fraction_step:float = 0.1,
                            training_params: list = [],
                            cv_folds: int = 50):
    """
    Uses the same train-test-val-split for each pruning cycle.
    """
        
    if target_sparsity > 1.0 or target_sparsity <= 0:
        print("INVALID value entered for target sparsity, it can only be in the range [0,1]")
    if pruning_fraction_step > target_sparsity:
        print("INVALID value entered for pruning fraction, it has to be <= target_sparsity")
    
    # list of models, new pruned models get appended to it while training
    pruned_models, training_history = [],[]
    plot_colors = ["red","orange","blue","cyan","purple","navy","teal","salmon"]
    i = 0
    while(init_sparsity <= target_sparsity):

        print("-----------------------------------------------------------------------------------------------")
        print("-----------------------------------------------------------------------------------------------")
        print("Currently pruning the model upto {} % of the baseline".format(round(init_sparsity*100)))
        print("-----------------------------------------------------------------------------------------------")
        print("-----------------------------------------------------------------------------------------------")

        # training loop begins
        lr = training_params[i]['lr']
        clipnorm = training_params[i]['clipnorm']
        eps = training_params[i]['eps']
        momentum = training_params[i]['momentum']
        retrain_epochs = training_params[i]['epochs']
        retrain_batch_size = training_params[i]['batch_size']
        l1_reg = training_params[i]['l1_reg']
        l2_reg = training_params[i]['l2_reg']
        sparsity = init_sparsity
        
        # define optimizer, callbacks here
        adam = Adam(lr=lr, clipnorm=clipnorm)
        lr_decay = LearningRateScheduler(lr_schedule, verbose=1)
        terminate_on_nan = TerminateOnNaN()
        early_stopping = EarlyStopping(monitor='val_loss', min_delta=1e-6, patience=40, 
                                       verbose=True,
                                       mode='min')
        curr_model = None
        if len(pruned_models) == 0:
            curr_model = baseline_model
        else:
            curr_model = pruned_models[-1]

        pruned_model = create_sparse_model(model = curr_model,
                                               input_dim = nvariables,
                                               output_dim = 2,
                                               k_sparsity = sparsity,
                                               bn_epsilon = eps,
                                               bn_momentum = momentum,
                                               l1_reg = l1_reg,
                                               l2_reg = l1_reg,
                                               kernel_initializer="glorot_uniform",
                                               optimizer = adam)

        pruned_model, history = train_sparse_model(sparse_model = pruned_model,
                                                           x = X_train,
                                                           y = y_train,
                                                           dxy = dxy_train,
                                                           retrain_epochs = retrain_epochs,
                                                           batch_size = retrain_batch_size,
                                                           callbacks=[lr_decay, 
                                                                      early_stopping, 
                                                                      terminate_on_nan],
                                                           verbose = True,
                                                           validation_split=0.1)
      
        k_fold_validation(model = pruned_model, 
                        x = X_test, 
                        y = y_test, 
                        dxy = dxy_test, 
                        folds =cv_folds,
                        metric_type = "MAE")    
        k_fold_validation(model = pruned_model, 
                        x = X_test, 
                        y = y_test, 
                        dxy = dxy_test, 
                        folds =cv_folds,
                        metric_type = "RMSE")   

        __generate_delta_plots__(model = pruned_model,
                                x = X_test,
                                y = y_test,
                                dxy = dxy_test,
                                color = plot_colors[i])
        
        pruned_models.append(pruned_model)
        training_history.append(history)

        # training ends
        i += 1
        init_sparsity += pruning_fraction_step
    
    return pruned_models, training_history

In [None]:
# pruned_models, training_history = run_iterative_pruning(
#                                                         baseline_model = baseline,
#                                                         target_sparsity = 0.7,
#                                                         training_params = ft_params)
pruned_models, training_history = run_iterative_pruning_v2(x_train_displ, y_train_displ, dxy_train_displ,
                                                          x_test_displ, y_test_displ, dxy_test_displ,
                                                          baseline_model = baseline,
                                                          init_sparsity = 0.1,
                                                          target_sparsity = 0.8,
                                                          pruning_fraction_step = 0.1,
                                                          training_params = ft_params,
                                                          cv_folds = 1)


### Part- V: Saving and Loading the Trained Model

In [None]:
import pickle
from nn_pruning_module_support import (saving_pruned_model,loading_pruned_model)
fron nn_evaluate import get_sparsity
from custom_dense_layer import MaskedDense
from keras.regularizers import l1_l2
from keras.initializers import glorot_uniform

In [None]:
for i in range(len(pruned_models)):
    with open("./trainingLog" + str((i+1)*10), 'wb') as file_pi:
          pickle.dump(training_history[i].history, file_pi)
    model_filename = "custom_model_" + str((i+1)*10)
    saving_pruned_model(model = pruned_models[i], 
                      filepath= sys.path[-1] + "/models", 
                      model_filename = model_filename)

In [None]:
# Test loading the pruned_model
pruned_70 = loading_pruned_model(filepath = sys.path[-1] + "/models",
                                 model_filename = "custom_model_70",
                                 custom_objects ={'GlorotUniform': glorot_uniform(), 
                                                  "MaskedDense": MaskedDense, 
                                                  "L1L2": l1_l2()})

In [None]:
for layer in pruned_70.layers:
  if "dense" in layer.name:
    print(get_sparsity(layer.get_weights()[0]))