# Point reconstruction in NEXT-NEW

In this notebook we read in the prepared data, construct and train the DNN, and then evaluate its performance for reconstruction of point-like events in NEXT-NEW.

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy  as np
import random as rd
import tables as tb
import h5py
import tensorflow as tf

from matplotlib.patches         import Ellipse
from __future__  import print_function
from scipy.stats import threshold

# Keras imports
import keras.backend.tensorflow_backend as K
from keras.models               import Model, load_model, Sequential
from keras.layers               import Input, Dense, MaxPooling3D, AveragePooling3D, Convolution3D, Activation, Dropout, merge
from keras.layers.normalization import BatchNormalization
from keras.optimizers           import SGD, Adam, Nadam         
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.core          import Flatten
from keras                      import callbacks
from keras.regularizers         import l2

Using TensorFlow backend.


# Variable definitions
Here we define key variables to be used throughout the notebook.  Note that we will read the data from a directory `data_location/run_name`, and it is stored in multiple files:
- The training data will consist of the events stored in files from `train_fstart` to `train_fend`
- The test data will consist of the events stored in files from `test_fstart` to `test_fend`

In [None]:
# data dimensions
xdim = 48
ydim = 48
zdim = 1

# data location and training/test file numbers
data_fname = "/Users/jrenner/IFIC/jerenner/next-deeplearning-analysis/data/dnn_kr_100k.h5"

# scale and shift factors: y* = y/fscale + fshift, where y* is label value for net training
fscale = 400.
fshift = 0.5

# other parameters
ntrain = 99000 # number of events to use for training and validation
sipm_th = 0.01 # threshold for inclusion of SiPM in barycenter

## Function definitions

### Data input functions

In [None]:
# define the function to read the data from multiple files
def read_data(dat_file, ntrain):
    
    # get the maps and the labels from the data file
    indata = tb.open_file(dat_file, 'r')
    sum_maps = np.reshape(indata.root.maps,(len(indata.root.maps), 48, 48))
    labels = np.array(indata.root.coords,dtype=np.float32)
    indata.close()

    # reshape the maps to add the extra channel dimension and the labels to fit in the interval [0,1]
    x_ = np.reshape(sum_maps[0:ntrain], (len(sum_maps[0:ntrain]), 48, 48, 1))
    y_ = labels[0:ntrain,:2]/fscale + fshift
    
    x_test = np.reshape(sum_maps[ntrain:], (len(sum_maps[ntrain:]), 48, 48, 1))
    y_test = labels[ntrain:,:2]/fscale + fshift
    
    print("Finished reading data: {0} training and {1} test events".format(len(x_),len(x_test)))
    return x_,y_,x_test,y_test

### Neural network models
These functions should define and return a Keras model object.

In [None]:
# define a fully-connected neural network with 64 hidden neurons and 1 readout neuron
def model_FC(inputs):

    model = Sequential()
    model.add(Flatten(input_shape=(48,48,1)))
    model.add(Dense(units=1024, activation='relu')) 
    model.add(Dense(units=512,  activation='relu'))
    model.add(Dense(units=256,  activation='relu'))
    model.add(Dense(units=128, activation='relu'))
    model.add(Dense(units=2,    activation='relu'))

    model.compile(loss='mse',
                  optimizer=Nadam(lr=0.001, beta_1=0.9, beta_2=0.999,
                                  epsilon=1e-08, schedule_decay=0.01),
                                  metrics=['accuracy']) 
    return model

In [None]:
# plot a 48x48 SiPM map
def NEW_SiPM_map_plot(xarr, yarr, normalize=True, zoom=False):
    if normalize:
        probs = (xarr - np.min(xarr))
        probs /= np.max(probs)
    else: 
        probs = xarr

    # set up the figure
    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    fig.set_figheight(5.0)
    fig.set_figwidth(5.0)
    ax1.axis([-250, 250, -250, 250])

    # draw the SiPMs with the appropriate shading
    for i in range(48):
        for j in range(48):
            r = Ellipse(xy=(i * 10 - 235, j * 10 - 235), width=2., height=2.)
            r.set_facecolor('0')
            r.set_alpha(probs[i, j] + 0.01)
            ax1.add_artist(r)
            
    # place a large blue circle for actual EL point
    xpt = fscale*(yarr[0] - fshift)
    ypt = fscale*(yarr[1] - fshift)
    mrk = Ellipse(xy=(xpt,ypt), width=4., height=4.)
    mrk.set_facecolor('b')
    ax1.add_artist(mrk)

    # zoom the plot around the most energetic SiPM if enabled
    if(zoom):
        amax = np.argmax(probs)
        imax = int(amax / 48); xmax = imax * 10 - 235
        jmax = amax % 48;      ymax = jmax * 10 - 235
        ax1.axis([xmax-50,xmax+50,ymax-50,ymax+50])
        
    plt.xlabel("x (mm)")
    plt.ylabel("y (mm)")

In [None]:
# returns a prediction of the (x,y) location based on a charge-weighted average of the SiPM maps
def predict_barycenter(x_evts):
    
    y_pred = []
    for emap in x_evts:
        
        xavg = 0; yavg = 0
        qsum = 0
        for i in range(48):
            for j in range(48):

                x = i * 10 - 235
                y = j * 10 - 235
                q = emap[i][j][0]

                if(q > sipm_th):
                    xavg += x*q
                    yavg += y*q
                    qsum += q
        
        xavg /= qsum
        yavg /= qsum
        y_pred.append(np.array([xavg,yavg]))
        
    return np.array(y_pred)

## Load in the data

In [None]:
# read in the training data
x_train, y_train, x_test, y_test = read_data(data_fname, ntrain)

In [None]:
# plot one event with corresponding true point
plt_evt = 11
NEW_SiPM_map_plot(x_train[plt_evt,:,:,0],y_train[plt_evt],normalize=True,zoom=False)
NEW_SiPM_map_plot(x_train[plt_evt,:,:,0],y_train[plt_evt],normalize=True,zoom=True)

## Define and train the DNN

In [None]:
# set load_model to true and specify the file to load in a previously defined/trained model
load_mdl = True
mfile = 'models/pt_classifier.h5'

if(load_mdl):
    model = load_model(mfile)
else:

    # otherwise define the model
    inputs = Input(shape=(xdim, ydim, zdim, 1))
    model = model_FC(inputs)
    
    # define callbacks (actions to be taken after each epoch of training)
    file_lbl = "{epoch:02d}-{loss:.4f}"
    filepath="weights-{0}.h5".format(file_lbl)
    checkpoint = callbacks.ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=True, mode='min')
    tboard = callbacks.TensorBoard(log_dir='./logs', write_graph=True, write_images=False)
    lcallbacks = [checkpoint, tboard]  
    model.summary()

In [None]:
# train the model
hist = model.fit(x_train, y_train, shuffle=True, epochs=60, batch_size=100, verbose=1, validation_split=0.05, callbacks=lcallbacks)

## Test the trained model

In [None]:
# compute the predictions
y_pred = model.predict(x_test, batch_size=100, verbose=0)
y_bc = predict_barycenter(x_test)

#for yt,yp,yb in zip(y_test,y_pred,y_bc):
#    print("true = ({0},{1}); pred = ({2},{3}); barycenter = ({4},{5})".format(fscale*(yt[0]-fshift),fscale*(yt[1]-fshift),fscale*(yp[0]-fshift),fscale*(yp[1]-fshift),yb[0],yb[1]))

In [None]:
# plot (error in coordinate) vs. coordinate
xtrue = fscale*(y_test[:,0] - fshift); ytrue = fscale*(y_test[:,1] - fshift)
xpred = fscale*(y_pred[:,0] - fshift); ypred = fscale*(y_pred[:,1] - fshift)
xbc = y_bc[:,0]; ybc = y_bc[:,1]
xweights_pred = np.abs(xpred - xtrue); yweights_pred = np.abs(ypred - ytrue)
xweights_bc = np.abs(xbc - xtrue); yweights_bc = np.abs(ybc - ytrue)

# (average error in x) vs. x
xcounts, xc_edges = np.histogram(xtrue,bins=50)
xhist_pred, xbin_edges_pred = np.histogram(xtrue,weights=xweights_pred,bins=50)
xbin_width_pred = xbin_edges_pred[1] - xbin_edges_pred[0]; xbin_centers_pred = xbin_edges_pred[:-1] + xbin_width_pred/2.
xhist_pred /= xcounts

xhist_bc, xbin_edges_bc = np.histogram(xtrue,weights=xweights_bc,bins=50)
xbin_width_bc = xbin_edges_bc[1] - xbin_edges_bc[0]; xbin_centers_bc = xbin_edges_bc[:-1] + xbin_width_bc/2.
xhist_bc /= xcounts

# (average error in y) vs. y
ycounts, yc_edges = np.histogram(ytrue,bins=50)
yhist_pred, ybin_edges_pred = np.histogram(ytrue,weights=yweights_pred,bins=50)
ybin_width_pred = ybin_edges_pred[1] - ybin_edges_pred[0]
ybin_centers_pred = ybin_edges_pred[:-1] + ybin_width_pred/2.
yhist_pred /= ycounts

yhist_bc, ybin_edges_bc = np.histogram(ytrue,weights=yweights_bc,bins=50)
ybin_width_bc = ybin_edges_bc[1] - ybin_edges_bc[0]
ybin_centers_bc = ybin_edges_bc[:-1] + ybin_width_bc/2.
yhist_bc /= ycounts

# (average error) vs. r
rvals = np.sqrt(xtrue**2 + ytrue**2)
aevals_pred = np.sqrt(xweights_pred**2 + yweights_pred**2)
aevals_bc = np.sqrt(xweights_bc**2 + yweights_bc**2)

rcounts, rc_edges = np.histogram(rvals,bins=50)
rhist_pred, rbin_edges_pred = np.histogram(rvals,weights=aevals_pred,bins=50)
rbin_width_pred = rbin_edges_pred[1] - rbin_edges_pred[0]
rbin_centers_pred = rbin_edges_pred[:-1] + rbin_width_pred/2.
rhist_pred /= rcounts

rhist_bc, rbin_edges_bc = np.histogram(rvals,weights=aevals_bc,bins=50)
rbin_width_bc = rbin_edges_bc[1] - rbin_edges_bc[0]
rbin_centers_bc = rbin_edges_bc[:-1] + rbin_width_bc/2.
rhist_bc /= rcounts

# create the plot
fig = plt.figure();
fig.set_figheight(5.0)
fig.set_figwidth(15.0)

ax1 = fig.add_subplot(131);
ax1.axis([-200,200,0,max(max(xhist_pred),max(xhist_bc))])
ax1.plot(xbin_centers_pred,xhist_pred,label='NN')
ax1.plot(xbin_centers_bc,xhist_bc,label='barycenter')
plt.legend(loc=2)
ax1.set_xlabel("x (mm)")
ax1.set_ylabel("avg. error (mm)")

ax2 = fig.add_subplot(132);
ax2.axis([-200,200,0,max(max(yhist_pred),max(yhist_bc))])
ax2.plot(ybin_centers_pred,yhist_pred,label='NN')
ax2.plot(ybin_centers_bc,yhist_bc,label='barycenter')
plt.legend(loc=2)
ax2.set_xlabel("y (mm)")
ax2.set_ylabel("avg. error (mm)")

ax3 = fig.add_subplot(133);
ax3.axis([0,max(rvals),0,max(max(rhist_pred),max(rhist_bc))])
ax3.plot(rbin_centers_pred,rhist_pred,label='NN')
ax3.plot(rbin_centers_bc,rhist_bc,label='barycenter')
plt.legend(loc=2)
ax3.set_xlabel("r (mm)")
ax3.set_ylabel("avg. error (mm)")