In [1]:
import json
import pandas as pd
import h5py
import numpy as np
import tensorflow as tf
import keras
from keras.layers import Input, Dense, Dropout, LSTM
from keras.models import Model
from keras.optimizers import Adam, SGD
from sklearn.model_selection import train_test_split
import os
import h5py
import matplotlib.pyplot as plt

In [2]:
df = pd.read_hdf('data_500k')

In [3]:
features = df.values[:,0:16]

In [4]:
labels = df.values[:,16:19]

In [5]:
length = len(df)
featuresnum = 16

In [6]:
X_train, X_test, y_train, y_test = train_test_split(features,labels,test_size = 0.2, random_state = 42)


In [7]:
images = X_train.reshape(len(X_train), 16, 1)

In [8]:
Inputs = Input(shape=(16,1))
x = LSTM(16, dropout = 0.5, kernel_initializer = 'lecun_uniform', name = 'lstm1')(Inputs)
x = Dense(512, activation='relu', kernel_initializer='lecun_uniform', name='fc1_relu')(x)
x= Dropout(rate = 0.5)(x)
x = Dense(256, activation='relu', kernel_initializer='lecun_uniform', name='fc2_relu')(x)
x= Dropout(rate = 0.5)(x)
x = Dense(128, activation='relu', kernel_initializer='lecun_uniform', name='fc3_relu')(x)
predictions = Dense(3, activation='softmax', kernel_initializer='lecun_uniform', name = 'output_sigmoid')(x)
model = Model(inputs=Inputs, outputs=predictions)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 16, 1)]           0         
_________________________________________________________________
lstm1 (LSTM)                 (None, 16)                1152      
_________________________________________________________________
fc1_relu (Dense)             (None, 512)               8704      
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
fc2_relu (Dense)             (None, 256)               131328    
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
fc3_relu (Dense)             (None, 128)               32896 

In [9]:
sgd = tf.keras.optimizers.SGD(lr=0.002, momentum = 0.9)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [10]:
history = model.fit(images, y_train, batch_size = 1024, epochs = 200, 
                    validation_split = 0.25, shuffle = True, callbacks = None,
                    use_multiprocessing=True, workers=4)

Epoch 1/200
 96/833 [==>...........................] - ETA: 1:35 - loss: 1.0972 - accuracy: 0.3587

KeyboardInterrupt: 

In [None]:
imagetest = X_test.reshape(len(X_test), 16, 1)

In [11]:
def makeRoc(features_val, labels_val, labels, model, outputDir='', outputSuffix=''):
    from sklearn.metrics import roc_curve, auc
    labels_pred = model.predict(features_val)
    df = pd.DataFrame()
    fpr = {}
    tpr = {}
    auc1 = {}
    plt.figure(figsize=(10,8))       
    for i, label in enumerate(labels):
        df[label] = labels_val[:,i]
        df[label + '_pred'] = labels_pred[:,i]
        fpr[label], tpr[label], threshold = roc_curve(df[label],df[label+'_pred'])
        auc1[label] = auc(fpr[label], tpr[label])
        plt.plot(fpr[label],tpr[label],label='%s tagger, AUC = %.1f%%'%(label.replace('j_',''),auc1[label]*100.))
    plt.plot([0, 1], [0, 1], lw=1, color='black', linestyle='--')
    #plt.semilogy()
    plt.xlabel("Background Efficiency")
    plt.ylabel("Signal Efficiency")
    plt.xlim([-0.05, 1.05])
    plt.ylim(0.001,1.05)
    plt.grid(True)
    plt.legend(loc='lower right')
    plt.figtext(0.25, 0.90,'LSTM ROC Curve',fontweight='bold', wrap=True, horizontalalignment='right', fontsize=14)
    #plt.figtext(0.35, 0.90,'preliminary', style='italic', wrap=True, horizontalalignment='center', fontsize=14) 
    #plt.savefig('%sROC_%s.pdf'%(outputDir, outputSuffix))
    return labels_pred

In [None]:
y_pred = makeRoc(imagetest, y_test, labels, model, outputSuffix='two-layer')

In [None]:
def learningCurve(history):
    plt.figure(figsize=(10,8))
    plt.plot(history.history['loss'], linewidth=1)
    plt.plot(history.history['val_loss'], linewidth=1)
    plt.title('Model Loss over Epochs')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['training sample loss','validation sample loss'])
    #plt.savefig('Learning_curve.pdf')
    plt.show()
    plt.close()

In [None]:
learningCurve(history)