In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
import energyflow as ef
import energyflow.archs
from energyflow.archs import PFN
from matplotlib import gridspec
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Layer, concatenate
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
from tensorflow.keras.layers import BatchNormalization

plt.rc('font', size=20)
plt.rcParams["font.family"] = "serif"

os.environ["CUDA_VISIBLE_DEVICES"] = "1" # pick a number < 4 on ML4HEP
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [3]:
#These are the same datasets from the OmniFold paper https://arxiv.org/abs/1911.09107.  More detail at https://energyflow.network/docs/datasets/.
#Pythia and Herwig are two generators; one will be treated here as the "simulation" and one as "data".
datasets = {'Pythia26': ef.zjets_delphes.load('Pythia26', num_data=1000000),
            'Herwig': ef.zjets_delphes.load('Herwig', num_data=1000000)}



In [9]:
w_true = datasets['Pythia26']['gen_widths'] #gen = particle level
w_reco = datasets['Pythia26']['sim_widths'] #sim = detector level
w_true_alt = datasets['Herwig']['gen_widths']
w_reco_alt = datasets['Herwig']['sim_widths']

p_gen = datasets['Pythia26']['gen_jets'][:,0] #gen = particle level
p_sim = datasets['Pythia26']['sim_jets'][:,0] #sim = detector level
p_truth = datasets['Herwig']['gen_jets'][:,0]
p_data = datasets['Herwig']['sim_jets'][:,0]

In [10]:
def weighted_binary_crossentropy(y_true, y_pred):
    weights = tf.gather(y_true, [1], axis=1) # event weights
    y_true = tf.gather(y_true, [0], axis=1) # actual y_true for loss

    weights_1 = K.sum(y_true*weights)
    weights_0 = K.sum((1-y_true)*weights)

    # Clip the prediction value to prevent NaN's and Inf's
    epsilon = K.epsilon()
    y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
    t_loss = -weights * ((y_true) * K.log(y_pred)/weights_1 +
                         (1 - y_true) * K.log(1 - y_pred)/weights_0)
    return K.mean(t_loss)

def weighted_binary_crossentropy_GAN(y_true, y_pred):
    weights = tf.gather(y_pred, [1], axis=1) # event weights
    y_pred = tf.gather(y_pred, [0], axis=1) # actual y_pred for loss

    weights_1 = K.sum(y_true*weights)
    weights_0 = K.sum((1-y_true)*weights)

    #tf.print("weights",weights_0,weights_1)

    # Clip the prediction value to prevent NaN's and Inf's
    epsilon = K.epsilon()
    y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
    t_loss = weights * ((1 - y_true) * K.log(1 - y_pred)/weights_0)
    return K.mean(t_loss)

In [None]:
xvals_1 = np.concatenate([w_true_alt,w_true])
yvals_1 = np.concatenate([np.ones(len(w_true_alt)),np.zeros(len(w_true))])

xvals_2 = np.concatenate([w_reco_alt,w_reco])
yvals_2 = np.concatenate([np.ones(len(w_reco_alt)),np.zeros(len(w_reco))])

X_train_1, X_test_1, Y_train_1, Y_test_1, X_train_2, X_test_2, Y_train_2, Y_test_2 = train_test_split(xvals_1, 
                                                                                    yvals_1, xvals_2, yvals_2)

myc = 0.1
mymodel_inputtest = Input(shape=(1,))


    
while n < N:
    print(f"{n = }")
    
    mymodel_test = MyLayer(myc)(mymodel_inputtest)
    model_generator = Model(mymodel_inputtest, mymodel_test)

    inputs_disc = Input((1, ))
    hidden_layer_1_disc = Dense(50, activation='relu')(inputs_disc)
    hidden_layer_2_disc = Dense(50, activation='relu')(hidden_layer_1_disc)
    hidden_layer_3_disc = Dense(50, activation='relu')(hidden_layer_2_disc)
    outputs_disc = Dense(1, activation='sigmoid')(hidden_layer_3_disc)
    model_discrimantor = Model(inputs=inputs_disc, outputs=outputs_disc)

    model_discrimantor.compile(loss=weighted_binary_crossentropy, optimizer='adam')

    model_discrimantor.trainable = False
    mymodel_gan = Input(shape=(1,))
    gan_model = Model(inputs=mymodel_gan,outputs=concatenate([model_discrimantor(mymodel_gan),model_generator(mymodel_gan)]))


    gan_model.compile(loss=weighted_binary_crossentropy_GAN, optimizer='adam')

    n_epochs = 10
    n_batch = 128*10
    n_batches = len(X_train_1) // n_batch

    for i in range(n_epochs):
        for j in range(n_batches):
            X_batch = X_train_1[j*n_batch:(j+1)*n_batch]
            Y_batch = Y_train_1[j*n_batch:(j+1)*n_batch]
            W_batch = model_generator(X_batch)
            W_batch = np.array(W_batch).flatten()
            W_batch[Y_batch==1] = 1
            #W_batch[Y_batch==0] = 1

            Y_batch_2 = np.stack((Y_batch, W_batch), axis=1)

            model_discrimantor.train_on_batch(X_batch, Y_batch_2)

            gan_model.train_on_batch(X_batch[Y_batch==0],np.zeros(len(X_batch[Y_batch==0])))
            
        mylambda = np.array(model_generator.layers[-1].get_weights())
        print("on epoch=",i, mylambda)

    arr = np.array([mylambda[:, k]*w_true**(k+1) for k in range(n)])
    exponent = np.exp(np.sum(arr, axis=0))
    weights_1 = np.concatenate([np.ones(len(w_true_alt)),exponent*len(w_true_alt)/np.sum(exponent)])

    X_train_1, X_test_1, Y_train_1, Y_test_1, X_train_2, X_test_2, Y_train_2, Y_test_2, w_train_1, w_test_1 = train_test_split(xvals_1, 
                                                                                    yvals_1, xvals_2, yvals_2, weights_1)

    #data, simulation w/o weights, weighted simulation
    for i in range(n):
        print("Moment ", i+1)
        print(np.mean(X_test_2[Y_test_2==1]**(i+1)),np.mean(X_test_2[Y_test_2==0]**(i+1)),np.average(X_test_2[Y_test_2==0]**(i+1),weights=w_test_1[Y_test_2==0]))
    errors_weighted[:, n] = [(np.average(X_test_2[Y_test_2==0]**(i+1),weights=w_test_1[Y_test_2==0]) - np.mean(X_test_2[Y_test_2==1]**(i+1)))/np.average(X_test_2[Y_test_2==0]**(i+1),weights=w_test_1[Y_test_2==0]) for i in range(N)]
    errors_unweighted[:, n] = [(np.mean(X_test_2[Y_test_2==0]**(i+1)) - np.mean(X_test_2[Y_test_2==1]**(i+1)))/np.mean(X_test_2[Y_test_2==0]**(i+1)) for i in range(N)]
    n += 1
    print("\n\n\n")