Use `uproot` can read ROOT objects from root type file without relying on ROOT I/O library

In [1]:
import uproot
import json
from io import StringIO

def get_net_struct(obj_path):
    '''
    Directly read ROOT objects specified in obj_path.
    obj_path -- file.root:Tdiectory/subdirectory/..../obj
    
    In this fuction we only need TString obj from the file.
    '''
    with uproot.open(File_path) as net_config:
    #convert string to dictionary
        assert (type(net_config) is uproot.models.TObjString.Model_TObjString )
        _struct = json.load(StringIO(net_config))
    return _struct

def print_dimention(weights):
    for w in weights[1:]:
        print(w["weights"])

def all_layers(weights):
    for i, w in enumerate(weights):
        print(i, w.keys())

#uproot.open("BTagCalibRUN2-08-40.root").keys()

Interested Dl1 networkes
* 'DL1',
* 'DL1/AntiKt4EMTopo',
* 'DL1/AntiKt4EMTopo/net_configuration',
* 'DL1mu',
* 'DL1mu/AntiKt4EMTopo',
* 'DL1mu/AntiKt4EMTopo/net_configuration',
* 'DL1rnn',
* 'DL1rnn/AntiKt4EMTopo',
* 'DL1rnn/AntiKt4EMTopo/net_configuration',


## DL1rnn 
DL1rnn is a neural network trained by b-tagging group. 
Model weights are stored in `BTagCalibRUN2-08-40.root` file as a string object. Our goal is to read the weight strings and convert them into json format.

In [2]:
#filename.root:Tdirectory/directory/obj
File_path="BTagCalibRUN2-08-40.root:DL1/AntiKt4EMTopo/net_configuration"

DL1_struct = get_net_struct(File_path)
DL1_weights = DL1_struct['layers']

print(f"number of layers: {len(DL1_weights)}")
all_layers(DL1_weights)
#print_dimention(weights)


number of layers: 17
0 dict_keys(['sublayers', 'activation', 'architecture'])
1 dict_keys(['bias', 'weights', 'architecture'])
2 dict_keys(['bias', 'weights', 'activation', 'architecture'])
3 dict_keys(['bias', 'weights', 'architecture'])
4 dict_keys(['bias', 'weights', 'activation', 'architecture'])
5 dict_keys(['bias', 'weights', 'architecture'])
6 dict_keys(['bias', 'weights', 'activation', 'architecture'])
7 dict_keys(['bias', 'weights', 'architecture'])
8 dict_keys(['bias', 'weights', 'activation', 'architecture'])
9 dict_keys(['bias', 'weights', 'architecture'])
10 dict_keys(['sublayers', 'activation', 'architecture'])
11 dict_keys(['bias', 'weights', 'architecture'])
12 dict_keys(['bias', 'weights', 'activation', 'architecture'])
13 dict_keys(['bias', 'weights', 'architecture'])
14 dict_keys(['bias', 'weights', 'activation', 'architecture'])
15 dict_keys(['bias', 'weights', 'architecture'])
16 dict_keys(['bias', 'weights', 'activation', 'architecture'])


The `sublayers` stored weights and biases of MaxoutDense layers.  Total of two MaxoutDense leyers are stored.  
Other layers are BatchNoramlization and Dense layers.  

In [3]:
DL1_struct.keys()

dict_keys(['layers', 'outputs', 'defaults', 'inputs'])

In [4]:
import numpy as np

#DL1_weights[0]['sublayers'][maxout_unit]['bias']

def get_maxout_weights(NN_layer):
    maxout_unit=0
    maxout_h_unit=len(NN_layer['sublayers'][maxout_unit]['bias'])
    in_features = len(NN_layer['sublayers'][maxout_unit]['weights'])//maxout_h_unit
    weight = np.array(NN_layer['sublayers'][maxout_unit]['weights']).reshape(in_features, maxout_h_unit)
    maxout_weights=[]
    maxout_biases = []

    for maxout_unit in range(25):
        maxout_weights.append(
                                np.array(NN_layer['sublayers'][maxout_unit]['weights']
                              ).reshape(in_features, maxout_h_unit) )
        maxout_biases.append(
                                np.array(NN_layer['sublayers'][maxout_unit]['bias'])
                            )
    return (np.concatenate(maxout_weights, axis=1), np.array( maxout_biases).flatten())

def get_dense_weights(NN_layer):
    h_unit=len(NN_layer["bias"])
    in_features = len(NN_layer['weights'])//h_unit
    weight = np.array(NN_layer['weights']).reshape(in_features, h_unit)
    return (weight, np.array(NN_layer["bias"]) )

def get_BN_weights(NN_layer):
    h_unit=len(NN_layer["bias"])
    return (np.array(NN_layer['weights']),
            np.array(NN_layer["bias"]), 
            np.array(h_unit*[0]), np.array(h_unit*[1]) ) 
             
maxout_w, maxout_b = get_maxout_weights(DL1_weights[0])


In [5]:
#len(DL1_weights[10]['sublayers'][maxout_unit]['weights'])//24
get_dense_weights(DL1_weights[16])


(array([[-1.35040843e+00, -1.66020200e-01, -1.97673023e+00],
        [-5.76982737e-01,  1.77271590e-01,  5.48534654e-02],
        [ 6.44354999e-01, -3.26688260e-01,  4.08351779e-01],
        [ 2.89725870e-01, -2.53850769e-04, -1.28739476e-01],
        [ 1.25599802e+00, -9.43711817e-01,  1.12691796e+00],
        [-8.74396861e-02, -6.20727062e-01, -8.17855239e-01]]),
 array([-0.58612728,  0.66026765, -0.19757801]))

In the bellow, I defin DL1rnn with tensorflow keras API. Instead of train the new network, I will set weights of each layer to the weights extracted above. 

In [6]:
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow_addons as tfa
#from keras.layers.core import MaxoutDense

from models.maxout_layers import Maxout1D
    
DL1_layers = [ 72, 57, 60, 48, 36,24, 12, 6]
DL1_dropouts = [0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2]
dropout_enable = True

# DL1rnn definition
#maxoutdense1 = MaxoutDense1D(72, 25)
#maxoutdense6 = MaxoutDense1D(24, 25)
def get_DL1(N_features, h_layers, lr=0.01, drops=None):
    In = tf.keras.layers.Input(shape=(N_features,))
    x = In
    x = Maxout1D(h_layers[0], 25)(x)
    x = keras.layers.BatchNormalization()(x)
    if drops:
            x = keras.layers.Dropout(drops[0])(x, training=True)
            
    for i, h in enumerate(h_layers[1:]):
        if i ==4:
            x = Maxout1D(h, 25)(x)
            x = keras.layers.BatchNormalization()(x)
            if drops: x = keras.layers.Dropout(drops[i+1])(x, training=True)
            continue
            
        x = keras.layers.Dense(h, activation="linear",
                  kernel_initializer='glorot_uniform')(x)
        x = keras.layers.BatchNormalization()(x)
        x = keras.layers.Activation("relu")(x)
        
        if drops:
            x = keras.layers.Dropout(drops[i+1])(x, training=True)
            
    predictions = keras.layers.Dense(3, activation='softmax',
                        kernel_initializer='glorot_uniform')(x)

    model = keras.models.Model(inputs=In, outputs=predictions)

    model_optimizer = keras.optimizers.Adam(lr=lr)
    model.compile(
        loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
        optimizer=model_optimizer,
        metrics=['accuracy']
    )
    return model


In [7]:

DL1_model = get_DL1(41,DL1_layers, drops=DL1_dropouts if dropout_enable else None )
DL1_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 41)]              0         
_________________________________________________________________
maxout1d (Maxout1D)          (None, 72)                75600     
_________________________________________________________________
batch_normalization (BatchNo (None, 72)                288       
_________________________________________________________________
dropout (Dropout)            (None, 72)                0         
_________________________________________________________________
dense (Dense)                (None, 57)                4161      
_________________________________________________________________
batch_normalization_1 (Batch (None, 57)                228       
_________________________________________________________________
activation (Activation)      (None, 57)                0     

In [8]:
#index=0 is input layer

k=-2 # layer index for weights from the root file

layer_index = {True: [1,4,8,12, 16, 20, 23, 27, 31],#1,20 are maxout
              False: [1,3,6,9,12,15,17,20, 23] #1,15 are maxout
              } 
maxout_index = {True: [1, 20],
               False: [1, 15]}

for i in layer_index[dropout_enable]:
    k = k+2
    print(i, k)
    if i in maxout_index[dropout_enable]:
        maxout_L = DL1_model.get_layer( index=i)
        maxout_L.set_weights(get_maxout_weights(DL1_weights[k]))
        BN_L = DL1_model.get_layer( index=i+1)
        BN_L.set_weights(get_BN_weights(DL1_weights[k+1]))
        continue
    if i == layer_index[dropout_enable][-1]:
        Dense_L = DL1_model.get_layer( index=i)
        Dense_L.set_weights(get_dense_weights(DL1_weights[k]))
        continue
    Dense_L = DL1_model.get_layer( index=i)
    Dense_L.set_weights(get_dense_weights(DL1_weights[k]))
    BN_L = DL1_model.get_layer( index=i+1)
    BN_L.set_weights(get_BN_weights(DL1_weights[k+1]))



1 0
4 2
8 4
12 6
16 8
20 10
23 12
27 14
31 16


In [9]:
#test model with dummy inputs
DL1_model(inputs=np.random.random((5, 41)), training=False)

<tf.Tensor: shape=(5, 3), dtype=float32, numpy=
array([[0.1679932 , 0.58424383, 0.24776304],
       [0.18530111, 0.57879996, 0.23589899],
       [0.1679932 , 0.58424383, 0.24776304],
       [0.1679932 , 0.58424383, 0.24776304],
       [0.1679932 , 0.58424383, 0.24776304]], dtype=float32)>

In [10]:
model_file = "DL1_AntiKt4EMTopo_dropout" if dropout_enable else "DL1_AntiKt4EMTopo"
DL1_model.save(model_file)

INFO:tensorflow:Assets written to: DL1_AntiKt4EMTopo_dropout/assets


## Save this model

This model contains a custom layer which can not be saved as a single `.h5` file with `save("model.h5")`. Becuase, the custom layer implemented in the model is not know, and you will get an error when loading the model again.   
Alternatively, `save("DL1_AntiKt4EMTopo")` will save our model into a directory which contains model architecture and weights.

## Load model

load_model() fuction `tf.keras.models.load_model("DL1_AntiKt4EMTopo")` can directly load model architectures and weights including the custom layer.

In [11]:
test_model = tf.keras.models.load_model(model_file)
test_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 41)]              0         
_________________________________________________________________
maxout1d (Maxout1D)          (None, 72)                75600     
_________________________________________________________________
batch_normalization (BatchNo (None, 72)                288       
_________________________________________________________________
dropout (Dropout)            (None, 72)                0         
_________________________________________________________________
dense (Dense)                (None, 57)                4161      
_________________________________________________________________
batch_normalization_1 (Batch (None, 57)                228       
_________________________________________________________________
activation (Activation)      (None, 57)                0     

In [12]:
test_model(inputs=np.random.random((5, 41)), training=False)

<tf.Tensor: shape=(5, 3), dtype=float32, numpy=
array([[9.9961275e-01, 3.8171333e-04, 5.5667260e-06],
       [7.6519459e-01, 2.0181756e-01, 3.2987844e-02],
       [7.6483935e-01, 2.0210320e-01, 3.3057477e-02],
       [3.0023044e-01, 5.2644479e-01, 1.7332473e-01],
       [1.6799320e-01, 5.8424383e-01, 2.4776304e-01]], dtype=float32)>