In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import energyflow as ef
import energyflow.archs
from energyflow.archs import PFN
from matplotlib import gridspec
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Layer, concatenate
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
from tensorflow.keras.layers import BatchNormalization

plt.rc('font', size=20)
plt.rcParams["font.family"] = "serif"

os.environ["CUDA_VISIBLE_DEVICES"] = "1" # pick a number < 4 on ML4HEP
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [2]:
#These are the same datasets from the OmniFold paper https://arxiv.org/abs/1911.09107.  More detail at https://energyflow.network/docs/datasets/.
#Pythia and Herwig are two generators; one will be treated here as the "simulation" and one as "data".
datasets = {'Pythia26': ef.zjets_delphes.load('Pythia26', num_data=1000000),
            'Herwig': ef.zjets_delphes.load('Herwig', num_data=1000000)}



In [3]:
w_gen = datasets['Pythia26']['gen_widths'] #gen = particle level
w_sim = datasets['Pythia26']['sim_widths'] #sim = detector level
w_truth = datasets['Herwig']['gen_widths']
w_data = datasets['Herwig']['sim_widths']

p_gen = datasets['Pythia26']['gen_jets'][:,0] #gen = particle level
p_sim = datasets['Pythia26']['sim_jets'][:,0] #sim = detector level
p_truth = datasets['Herwig']['gen_jets'][:,0]
p_data = datasets['Herwig']['sim_jets'][:,0]

In [4]:
def weighted_binary_crossentropy(y_true, y_pred):
    weights = tf.gather(y_true, [1], axis=1) # event weights
    y_true = tf.gather(y_true, [0], axis=1) # actual y_true for loss

    weights_1 = K.sum(y_true*weights)
    weights_0 = K.sum((1-y_true)*weights)

    # Clip the prediction value to prevent NaN's and Inf's
    epsilon = K.epsilon()
    y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
    t_loss = -weights * ((y_true) * K.log(y_pred)/weights_1 +
                         (1 - y_true) * K.log(1 - y_pred)/weights_0)
    return K.mean(t_loss)

def weighted_binary_crossentropy_GAN(y_true, y_pred):
    weights = tf.gather(y_pred, [1], axis=1) # event weights
    y_pred = tf.gather(y_pred, [0], axis=1) # actual y_pred for loss

    weights_1 = K.sum(y_true*weights)
    weights_0 = K.sum((1-y_true)*weights)

    #tf.print("weights",weights_0,weights_1)

    # Clip the prediction value to prevent NaN's and Inf's
    epsilon = K.epsilon()
    y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
    t_loss = weights * ((1 - y_true) * K.log(1 - y_pred)/weights_0)
    return K.mean(t_loss)

In [10]:
xvals_1 = np.concatenate([w_truth,w_gen])
pvals_1 = np.concatenate([p_truth, p_gen])
yvals_1 = np.concatenate([np.ones(len(w_truth)),np.zeros(len(w_gen))])

xvals_2 = np.concatenate([w_data,w_sim])
pvals_2 = np.concatenate([p_data, p_sim])
yvals_2 = np.concatenate([np.ones(len(w_data)),np.zeros(len(w_sim))])

X_train_1, X_test_1, Y_train_1, Y_test_1, X_train_2, X_test_2, Y_train_2, Y_test_2 , P_train_1, P_test_1, P_train_2, P_train_1 = train_test_split(xvals_1, yvals_1, xvals_2, yvals_2, pvals_1, pvals_2)


inputs_gen = Input(shape=(2,))
hidden_layer_1_gen = Dense(50, activation='relu')(mymodel_inputtest)
hidden_layer_2_gen = Dense(50, activation='relu')(hidden_layer_1_gen)
hidden_layer_3_gen = Dense(50, activation='relu')(hidden_layer_2_gen)
outputs_gen = tf.exp(Dense(1, activation='linear')(hidden_layer_3_gen))
model_generator = Model(inputs=inputs_gen, outputs=outputs_gen)



inputs_disc = Input((2, ))
hidden_layer_1_disc = Dense(50, activation='relu')(inputs_disc)
hidden_layer_2_disc = Dense(50, activation='relu')(hidden_layer_1_disc)
hidden_layer_3_disc = Dense(50, activation='relu')(hidden_layer_2_disc)
outputs_disc = Dense(1, activation='sigmoid')(hidden_layer_3_disc)
model_discrimantor = Model(inputs=inputs_disc, outputs=outputs_disc)

n_epochs = 20
n_batch = 128*10
n_batches = len(X_train_1) // n_batch

for i in range(n_epochs):
    lambdasum = np.log(model_generator.predict([1.]))
    lambdasum2 = np.log(model_generator.predict([2.]))
    mylambda1 = (lambdasum2-2*lambdasum)/2
    mylambda0 = lambdasum - mylambda1
    print("on epoch=",i,mylambda0,mylambda1)
    #print("  ",np.sum(model_generator.predict(X_train_1,batch_size=1000)))
    for j in range(n_batches):
        X_batch = X_train_1[j*n_batch:(j+1)*n_batch]
        Y_batch = Y_train_1[j*n_batch:(j+1)*n_batch]
        P_batch = P_train_1[j*n_batch:(j+1)*n_batch]
        W_batch = model_generator(X_batch, P_batch)
        W_batch = np.array(W_batch).flatten()
        W_batch[Y_batch==1] = 1
        #W_batch[Y_batch==0] = 1
        
        Y_batch_2 = np.stack((Y_batch, W_batch), axis=1)
        
        model_discrimantor.train_on_batch(X_batch, Y_batch_2)
        
        #print("      ",j,np.sum(model_generator.predict(X_batch,batch_size=1000)),np.log(model_generator.predict([1.]))-np.log(model_generator.predict([0.])),np.log(model_generator.predict([0.])))
        
        gan_model.train_on_batch(X_batch[Y_batch==0],np.zeros(len(X_batch[Y_batch==0])))
        



ValueError: Graph disconnected: cannot obtain value for tensor KerasTensor(type_spec=TensorSpec(shape=(None, 2), dtype=tf.float32, name='input_1'), name='input_1', description="created by layer 'input_1'") at layer "dense_10". The following previous layers were accessed without issue: []