<a href="https://colab.research.google.com/github/arizzi/NNTutorial/blob/master/Notebook3_Architecture_Examples_WithSolutions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Notebook 3: architecture examples
In this notebook, we will explore a few kinds of network layers
- Conv2D
- Concatenated DNN+Conv2D
- Conv1D
- GRU

You will then combine these ingredients in a new model (at your wish) and train the model to check performances

In [0]:
import os
if os.path.isfile('jetImage_Merged.h5') :
    print ("File already downloaded")
else:
    !wget http://cern.ch/arizzi/jetImage_Merged.h5
    !mkdir models

File already downloaded


In [0]:
# keras imports
from keras.models import Model
from keras.layers import Dense, Input, Conv2D, Dropout, Flatten, GRU
from keras.layers import Concatenate, Reshape, BatchNormalization, Activation
from keras.layers import MaxPooling2D, MaxPooling3D
from keras.utils import plot_model
from keras import regularizers
from keras import backend as K
from keras import metrics
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, TerminateOnNaN
from keras.regularizers import l1

In [0]:
%matplotlib inline

In [0]:
import h5py
import glob
import numpy as np
import matplotlib.pyplot as plt

# Dataset preparation

In [0]:
f = h5py.File("jetImage_Merged.h5")
jets = f.get('jets')
y = np.array(jets[:,-6:-1])
X = np.array(jets[:,:-6])
Ximage = np.array(f.get('jetImage'))
Xlist = np.array(f.get('jetConstituentList'))

In [0]:
X.shape, Ximage.shape, Xlist.shape

((98001, 53), (98001, 25, 25), (98001, 188, 16))

---

Split the dataset as follows:
- 2/3 for training
- 1/3 for validation 

This time we do it by hand, after shuffling the dataset (just in case)

---

In [0]:
nSplit = int(2./3.*X.shape[0])
permutation = np.random.permutation(X.shape[0])
X = X[permutation]
y = y[permutation]
Ximage = Ximage[permutation]
Xlist = Xlist[permutation]
X_train = X[:nSplit, :]
X_test = X[nSplit:, :]
y_train = y[:nSplit, :]
y_test = y[nSplit:, :]
Ximage_train = Ximage[:nSplit, :, :]
Ximage_test = Ximage[nSplit:, :, :]
Xlist_train = Xlist[:nSplit, :, :]
Xlist_test = Xlist[nSplit:, :, :]
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape, Ximage_train.shape, Ximage_test.shape, Xlist_train.shape, Xlist_test.shape)

(65334, 53) (32667, 53) (65334, 5) (32667, 5) (65334, 25, 25) (32667, 25, 25) (65334, 188, 16) (32667, 188, 16)


# Build a Conv2D model

Some keras magic: add an extra column for the dataset, representing the channel. <br>
For instance:
- for images in RGB format one would have 3 challels
- for ECAL+HCAL one could foresee two channels

In [0]:
Ximage_train = Ximage_train.reshape((Ximage_train.shape[0], Ximage_train.shape[1], Ximage_train.shape[2], 1))
Ximage_test = Ximage_test.reshape((Ximage_test.shape[0], Ximage_test.shape[1], Ximage_test.shape[2], 1))

In [0]:
batch_size = 128
n_epochs = 500
dropoutRate = 0.25
img_rows = Ximage_train.shape[1]
img_cols = Ximage_train.shape[2]

In [0]:
image_shape = (img_rows,img_cols,1)
####
inputImage = Input(shape=(image_shape))
x = BatchNormalization()(inputImage)
x = Conv2D(5, kernel_size=(3,3), data_format="channels_last", strides=(3, 3), 
               padding="same", input_shape=image_shape)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Dropout(dropoutRate)(x)
#
x = Conv2D(3, kernel_size=(5,5), data_format="channels_last", strides=(2, 2), 
               padding="same", input_shape=image_shape,)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Dropout(dropoutRate)(x)
#
x = Conv2D(2, kernel_size=(7,7), data_format="channels_last", strides=(2, 2), 
               padding="same", input_shape=image_shape,)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Dropout(dropoutRate)(x)
#
x = Flatten()(x)
#
x = Dense(10, activation='relu')(x)
x = Dropout(dropoutRate)(x)
#
output = Dense(5, activation='softmax')(x)
####
model = Model(inputs=inputImage, outputs=output)

In [0]:
model.compile(loss='categorical_crossentropy', optimizer='adam')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 25, 25, 1)         0         
_________________________________________________________________
batch_normalization_5 (Batch (None, 25, 25, 1)         4         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 9, 9, 5)           50        
_________________________________________________________________
batch_normalization_6 (Batch (None, 9, 9, 5)           20        
_________________________________________________________________
activation_4 (Activation)    (None, 9, 9, 5)           0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 9, 9, 5)           0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 5, 5, 3)           378       
__________

# Train the model

In [0]:
# train 
history = model.fit(Ximage_train, y_train, epochs=n_epochs, batch_size=batch_size, verbose = 2,
                validation_data=(Ximage_test, y_test),
                callbacks = [
                EarlyStopping(monitor='val_loss', patience=10, verbose=1),
                ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1),
                TerminateOnNaN()])

Train on 65334 samples, validate on 32667 samples
Epoch 1/500
 - 18s - loss: 1.5487 - val_loss: 1.4080
Epoch 2/500
 - 17s - loss: 1.4499 - val_loss: 1.3626
Epoch 3/500
 - 17s - loss: 1.4028 - val_loss: 1.3173
Epoch 4/500
 - 17s - loss: 1.3695 - val_loss: 1.2585
Epoch 5/500
 - 17s - loss: 1.3493 - val_loss: 1.2576
Epoch 6/500
 - 17s - loss: 1.3261 - val_loss: 1.2172
Epoch 7/500
 - 17s - loss: 1.3079 - val_loss: 1.2295
Epoch 8/500
 - 17s - loss: 1.2981 - val_loss: 1.5120

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 9/500
 - 17s - loss: 1.2856 - val_loss: 1.2109
Epoch 10/500
 - 17s - loss: 1.2790 - val_loss: 1.1942
Epoch 11/500
 - 17s - loss: 1.2746 - val_loss: 1.1863
Epoch 12/500
 - 17s - loss: 1.2721 - val_loss: 1.1741
Epoch 13/500
 - 17s - loss: 1.2771 - val_loss: 1.1830
Epoch 14/500
 - 17s - loss: 1.2724 - val_loss: 1.1910

Epoch 00014: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 15/500
 - 17s - loss: 1.2720 - va

In [0]:
# plot training history
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.yscale('log')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
#

# Store model into files

In [0]:
name = 'Conv2D_Small'

In [0]:
model_json = model.to_json()
with open("models/jetTagger_%s.json" %name, "w") as json_file:
    json_file.write(model_json)
model.save_weights("models/jetTagger_%s.h5" %name)

# Read model

In [0]:
from keras.models import model_from_json
# load json and create model
json_file = open("models/jetTagger_%s.json" %name, 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
# load weights into new model
model.load_weights("models/jetTagger_%s.h5" %name)
print("Loaded model from disk")

# Check Performances

In [0]:
labels = ['j_g', 'j_q', 'j_w', 'j_z', 'j_t']

In [0]:
import pandas as pd
from sklearn.metrics import roc_curve, auc
predict_test = model.predict(Ximage_test)
df = pd.DataFrame()
fpr = {}
tpr = {}
auc1 = {}

plt.figure()
for i, label in enumerate(labels):
        df[label] = y_test[:,i]
        df[label + '_pred'] = predict_test[:,i]

        fpr[label], tpr[label], threshold = roc_curve(df[label],df[label+'_pred'])

        auc1[label] = auc(fpr[label], tpr[label])

        plt.plot(tpr[label],fpr[label],label='%s tagger, auc = %.1f%%'%(label,auc1[label]*100.))
plt.semilogy()
plt.xlabel("sig. efficiency")
plt.ylabel("bkg. mistag rate")
plt.ylim(0.0001,1)
plt.grid(True)
plt.legend(loc='lower right')
#plt.savefig('%s/ROC.pdf'%(options.outputDir))
plt.show()

In [0]:
import itertools
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [0]:
from sklearn.metrics import confusion_matrix
cnf_matrix = confusion_matrix(np.argmax(y_test, axis=1), np.argmax(predict_test, axis=1))
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=labels,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=labels, normalize=True,
                      title='Normalized confusion matrix')

plt.show()

# Exercise:
Try to combine a convolutional and a dense NN (to make it short, train for less than 10 epochs)<br> Keep in mind that two 1-D layers can be concatenated doing

x = Concatenate()([x1,x2])

and that it is possible to use arrays of input layers when defining the network 

model = Model(inputs=[input1, input2], outputs=output)

and two datasets when training

model.fit([x1_train, x2_train], y_train, epochs=n_epochs, batch_size=batch_size, verbose = 2,
                validation_data=([x1_test, x2_test], y_test),

In [0]:
Ximage_train = Ximage_train.reshape((Ximage_train.shape[0], Ximage_train.shape[1], Ximage_train.shape[2], 1))
Ximage_test = Ximage_test.reshape((Ximage_test.shape[0], Ximage_test.shape[1], Ximage_test.shape[2], 1))

In [0]:
batch_size = 128
n_epochs = 10
dropoutRate = 0.25
img_rows = Ximage_train.shape[1]
img_cols = Ximage_train.shape[2]

In [0]:
image_shape = (img_rows,img_cols,1)
####
inputImage = Input(shape=(image_shape))
xI = BatchNormalization()(inputImage)
xI = Conv2D(5, kernel_size=(5,5), data_format="channels_last", strides=(1, 1), 
               padding="same", input_shape=image_shape)(xI)
xI = BatchNormalization()(xI)
xI = Activation("relu")(xI)
xI = Dropout(dropoutRate)(xI)
#
xI = Conv2D(3, kernel_size=(3,3), data_format="channels_last", strides=(1, 1), 
               padding="same", input_shape=image_shape)(xI)
xI = BatchNormalization()(xI)
xI = Activation("relu")(xI)
xI = Dropout(dropoutRate)(xI)
#
xI = Conv2D(1, kernel_size=(2,2), data_format="channels_last", strides=(3, 3), 
               padding="same", input_shape=image_shape)(xI)
xI = BatchNormalization()(xI)
xI = Activation("relu")(xI)
xI = Dropout(dropoutRate)(xI)
#
xI = Flatten()(xI)
#
inputLayer = Input(shape=(53,))
xD = BatchNormalization()(inputLayer)
####
xD = Dense(10, activation='relu')(xD)
xD = Dropout(dropoutRate)(xD)
####
x = Concatenate()([xI,xD])
x = Dense(5, activation='relu')(x)
x = Dropout(dropoutRate)(x)
#
output = Dense(5, activation='softmax')(x)
####
model = Model(inputs=[inputImage,inputLayer], outputs=output)

In [0]:
model.compile(loss='categorical_crossentropy', optimizer='adam')
model.summary()

In [0]:
# train 
history = model.fit([Ximage_train, X_train],y_train, epochs=n_epochs, batch_size=batch_size, verbose = 2,
                validation_data=([Ximage_test, X_test], y_test),
                callbacks = [
                EarlyStopping(monitor='val_loss', patience=10, verbose=1),
                ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1),
                TerminateOnNaN()])

In [0]:
# plot training history
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.yscale('log')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
#

In [0]:
# write model
name = 'Conv2D_DNN_Small'
model_json = model.to_json()
with open("models/jetTagger_%s.json" %name, "w") as json_file:
    json_file.write(model_json)
model.save_weights("models/jetTagger_%s.h5" %name)

In [0]:
labels = ['j_g', 'j_q', 'j_w', 'j_z', 'j_t']

In [0]:
import pandas as pd
from sklearn.metrics import roc_curve, auc
predict_test = model.predict([Ximage_test, X_test])
df = pd.DataFrame()
fpr = {}
tpr = {}
auc1 = {}

plt.figure()
for i, label in enumerate(labels):
        df[label] = y_test[:,i]
        df[label + '_pred'] = predict_test[:,i]

        fpr[label], tpr[label], threshold = roc_curve(df[label],df[label+'_pred'])

        auc1[label] = auc(fpr[label], tpr[label])

        plt.plot(tpr[label],fpr[label],label='%s tagger, auc = %.1f%%'%(label,auc1[label]*100.))
plt.semilogy()
plt.xlabel("sig. efficiency")
plt.ylabel("bkg. mistag rate")
plt.ylim(0.0001,1)
plt.grid(True)
plt.legend(loc='lower right')
#plt.savefig('%s/ROC.pdf'%(options.outputDir))
plt.show()

In [0]:
from sklearn.metrics import confusion_matrix
cnf_matrix = confusion_matrix(np.argmax(y_test, axis=1), np.argmax(predict_test, axis=1))
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=labels,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=labels, normalize=True,
                      title='Normalized confusion matrix')

plt.show()

# Exercise:
Do the same using the list-of-particle dataset with a GRU taking as input the first 30 particles in the list of 188  (to make it short, train for less than 10 epochs)<br> 
Keep in mind that the syntax for a GRU layer is

x = GRU(20, activation='selu', recurrent_activation='hard_sigmoid')(InputLayer)

In [0]:
Xlist_test_short = Xlist_test[:,:30,:]
Xlist_train_short = Xlist_train[:,:30,:]

In [0]:
####
InputLayer = Input(shape=(30,16))
x = GRU(20, activation='selu', recurrent_activation='hard_sigmoid', name='gru_selu',)(InputLayer)
x = Dense(20, activation='relu', kernel_initializer='lecun_uniform', name='dense_relu')(x)
x = Dropout(dropoutRate)(x)
outputLayer = Dense(5, activation='softmax', kernel_initializer='lecun_uniform', name = 'output_softmax')(x)
####
model = Model(inputs=InputLayer, outputs=outputLayer)

In [0]:
model.compile(loss='categorical_crossentropy', optimizer='adam')
model.summary()

In [0]:
n_epochs = 100
# train 
history = model.fit(Xlist_train_short, y_train, epochs=n_epochs, batch_size=batch_size, verbose = 2,
                validation_data=(Xlist_test_short, y_test),
                callbacks = [
                EarlyStopping(monitor='val_loss', patience=10, verbose=1),
                ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1),
                TerminateOnNaN()])

In [0]:
# plot training history
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.yscale('log')
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
#

In [0]:
# write model
name = 'GRU_Small'
model_json = model.to_json()
with open("models/jetTagger_%s.json" %name, "w") as json_file:
    json_file.write(model_json)
model.save_weights("models/jetTagger_%s.h5" %name)

In [0]:
labels = ['j_g', 'j_q', 'j_w', 'j_z', 'j_t']

In [0]:
import pandas as pd
from sklearn.metrics import roc_curve, auc
predict_test = model.predict(Xlist_test_short)
df = pd.DataFrame()
fpr = {}
tpr = {}
auc1 = {}

plt.figure()
for i, label in enumerate(labels):
        df[label] = y_test[:,i]
        df[label + '_pred'] = predict_test[:,i]

        fpr[label], tpr[label], threshold = roc_curve(df[label],df[label+'_pred'])

        auc1[label] = auc(fpr[label], tpr[label])

        plt.plot(tpr[label],fpr[label],label='%s tagger, auc = %.1f%%'%(label,auc1[label]*100.))
plt.semilogy()
plt.xlabel("sig. efficiency")
plt.ylabel("bkg. mistag rate")
plt.ylim(0.0001,1)
plt.grid(True)
plt.legend(loc='lower right')
#plt.savefig('%s/ROC.pdf'%(options.outputDir))
plt.show()

In [0]:
from sklearn.metrics import confusion_matrix
cnf_matrix = confusion_matrix(np.argmax(y_test, axis=1), np.argmax(predict_test, axis=1))
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=labels,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=labels, normalize=True,
                      title='Normalized confusion matrix')

plt.show()