In [1]:
import os
import sys

NJET_BLHA='/Users/simon/packages/njet-3.1.1-1L/blha/'
sys.path.append(NJET_BLHA)

import numpy as np
from pstools.rambo import generate, dot
from njettools.njet_interface import *
from nntools.model import Model

import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
# choose the number of training points (will later be split into NN train/test set)
n_training_points = 10000
n_points = 20000 # points for inference
delta_cut = 0.01
n_final = 4
contract_file = 'NJ_contract_ee'+str(n_final)+'j_tree.lh'

In [3]:
# generate 2 -> 3 phase-space points for training
momenta, n_trials = generate(n_final, n_training_points, rts=1000., delta=delta_cut)
momenta = momenta.tolist()

100%|██████████| 10000/10000 [00:02<00:00, 3348.06it/s]


In [4]:
# start the NJet interface
olp = njet.OLP()
status = njet_init(contract_file)

if status == True:
    print ("OLP read in correctly")
else:
    print ("seems to be a problem with the contract file...")

OLP read in correctly


In [5]:
mur = 100.
alphas = 0.118
alpha = 1/137.

In [6]:
NJ_treevals = [];
for pt in range(n_training_points):
    vals = olp.OLP_EvalSubProcess(1, momenta[pt], alphas=alphas, alpha=alpha, mur=mur, retlen=1)
    NJ_treevals.append(vals[0])

In [7]:
def getFKSpartitions(mom):
    nn = len(mom)
    ss = [];
    for i in range(2,nn-1):
        for j in range(i+1,nn):
            #print(i, j, i-2+(j-2)*(j-3)/2)
            ss.append(2*dot(mom[i],mom[j]))
    ss = np.array(ss)
    DD = np.sum(1/ss)
    SS = 1/ss/DD
    return(SS)

In [8]:
parts = getFKSpartitions(momenta[0])
n_FKSsectors = len(parts)

print(parts)
print(np.sum(parts))

[0.02567362 0.09737629 0.01347357 0.23910074 0.07462899 0.54974679]
1.0


In [9]:
NJ_treevals_FKS = []
momenta_FKS = []
for pt in range(n_training_points):
    parts = getFKSpartitions(momenta[pt])
    momenta_FKS.append(parts)
    NJ_treevals_FKS.append(NJ_treevals[pt]*parts)

momenta_FKS = np.array(momenta_FKS)
NJ_treevals_FKS = np.array(NJ_treevals_FKS)

In [10]:
# dump generated data in case NJet and interface not available
np.save("data/NJfks_ee"+str(n_final)+"j_tree_momenta_"+str(delta_cut)+".npy", momenta)
np.save("data/NJfks_ee"+str(n_final)+"j_tree_values_"+str(delta_cut)+".npy", NJ_treevals)
np.save("data/NJfks_ee"+str(n_final)+"j_tree_momenta_FKS_"+str(delta_cut)+".npy", momenta_FKS)
np.save("data/NJfks_ee"+str(n_final)+"j_tree_values_FKS_"+str(delta_cut)+".npy", NJ_treevals_FKS)

# Start from here with pre-generated data #

In [11]:
momenta = np.load("data/NJfks_ee"+str(n_final)+"j_tree_momenta_"+str(delta_cut)+".npy")
NJ_treevals = np.load("data/NJfks_ee"+str(n_final)+"j_tree_values_"+str(delta_cut)+".npy")
momenta_FKS = np.load("data/NJfks_ee"+str(n_final)+"j_tree_momenta_FKS_"+str(delta_cut)+".npy") 
NJ_treevals_FKS = np.load("data/NJfks_ee"+str(n_final)+"j_tree_values_FKS_"+str(delta_cut)+".npy")
n_training_points = len(momenta)
n_FKSsectors = len(momenta_FKS[0])

# First train a basic or "naive" single model to test against #

In [12]:
NN = Model(
    (2+n_final)*4, # train with all momenta components 
    momenta[:n_training_points], # input data from Rambo PS generator
    np.array(NJ_treevals) # data points from NJet evaluations
)

In [13]:
model, x_mean, x_std, y_mean, y_std = NN.fit(layers=[16,32,16], epoch_interval=100)

processing training data using scaling =  standardise
The training dataset has size (8000, 24)
Epoch 1/10000
Epoch 101/10000
Epoch 201/10000


# Now train each FKS partition separately #

In [None]:
NN_FKS = [Model(
    (2+n_final)*4, # train with all momenta components 
    momenta[:n_training_points], # input data from Rambo PS generator
    np.array(NJ_treevals_FKS[:,i]) # data points from NJet evaluations
) for i in range(n_FKSsectors)]

NNfitdata_FKS = [NN_FKS[i].fit(layers=[16,32,16], epoch_interval=100) for i in range(n_FKSsectors)]

processing training data using scaling =  standardise
The training dataset has size (8000, 24)
Epoch 1/10000
Epoch 101/10000
Epoch 201/10000
Epoch 301/10000
processing training data using scaling =  standardise
The training dataset has size (8000, 24)
Epoch 1/10000
Epoch 101/10000


# test models against some new data points #

In [None]:
newmomenta, n_trials2 = generate(n_final, n_points, rts=1000., delta=delta_cut)
newmomenta = newmomenta.tolist()

In [None]:
NJ_treevals_test = [];
for pt in range(n_points):
    vals = olp.OLP_EvalSubProcess(1, newmomenta[pt], alphas=alphas, alpha=alpha, mur=mur, retlen=1)
    NJ_treevals_test.append(vals[0])

In [None]:
# predict values with single network
x_standardized = NN.process_testing_data(moms=newmomenta,
                                         x_mean=x_mean,x_std=x_std,y_mean=y_mean,y_std=y_std)
mpred = model.predict(x_standardized)
amp_pred = NN.destandardise_data(mpred.reshape(-1),
                                 x_mean=x_mean,x_std=x_std,y_mean=y_mean,y_std=y_std)

In [None]:
# predict values with FKS networks
amp_pred_FKS = []

for i in range(n_FKSsectors):
    model_tmp = NNfitdata_FKS[i][0]
    x_m = NNfitdata_FKS[i][1]
    x_s = NNfitdata_FKS[i][2]
    y_m = NNfitdata_FKS[i][3]
    y_s = NNfitdata_FKS[i][4]
    
    x_standardized = NN_FKS[i].process_testing_data(moms=newmomenta,
                                         x_mean=x_m,x_std=x_s,y_mean=y_m,y_std=y_s)
    mpred = model_tmp.predict(x_standardized)
    amp_pred_FKS.append(NN_FKS[i].destandardise_data(mpred.reshape(-1),
                                 x_mean=x_m,x_std=x_s,y_mean=y_m,y_std=y_s))
    
amp_pred_FKS = np.array(amp_pred_FKS)

In [None]:
amp_pred_FKS_sum = np.sum(amp_pred_FKS,axis=0)

In [None]:
print(NJ_treevals_test[0:5])
print(amp_pred[0:5])
print(amp_pred_FKS_sum[0:5])

In [None]:
part = getFKSpartitions(newmomenta[0])
print(NJ_treevals_test[0]*part)
print([amp_pred_FKS[i,0] for i in range(n_FKSsectors)])

In [None]:
diff = (amp_pred-np.array(NJ_treevals_test))/(amp_pred+np.array(NJ_treevals_test))
diff_FKS = (amp_pred_FKS_sum-np.array(NJ_treevals_test))/(amp_pred_FKS_sum+np.array(NJ_treevals_test))

In [None]:
mybins = np.histogram_bin_edges(diff, bins=200, range=(-1.5,1.5))

In [None]:
plt.hist(diff, density=False, bins=mybins, label='single')
plt.hist(diff_FKS, density=False, bins=mybins, label='FKS')
plt.ylabel('Accuracy')
plt.xlabel('Data')
plt.text(-1.5,4000,'delta = '+str(delta_cut))
plt.text(-1.5,3700,'ee --> '+str(n_final)+'j')
plt.legend()

In [None]:
logdiff = np.log10(np.abs(diff))
logdiff_FKS = np.log10(np.abs(diff_FKS))

In [None]:
mybins = np.histogram_bin_edges(logdiff, bins=200, range=(-4,1))

In [None]:
plt.hist(logdiff, density=False, bins=mybins, label='single')
plt.hist(logdiff_FKS, density=False, bins=mybins, label='FKS')
plt.ylabel('Log10 Accuracy')
plt.xlabel('Data')
plt.text(-4,450,'delta = '+str(delta_cut))
plt.text(-4,400,'ee --> '+str(n_final)+'j')
plt.legend()