In [1]:
import numpy as np
from numpy.random import default_rng
import matplotlib.pyplot as plt
import energyflow as ef
import energyflow.archs
from energyflow.archs import PFN
from matplotlib import gridspec
import matplotlib.lines as mlines
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Layer, concatenate
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
from tensorflow.keras.layers import BatchNormalization
import corner


plt.rc('font', size=20)

In [2]:
rng = default_rng()
n_dim = 2
gauss_data = rng.multivariate_normal(np.zeros(n_dim), np.identity(n_dim), 100000)
gauss_sim = rng.multivariate_normal(np.ones(n_dim), np.identity(n_dim), 100000)

## One Moment

In [6]:
class MyLayer(Layer):

    def __init__(self, **kwargs):
        super(MyLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        self._lambda0 = self.add_weight(name='lambda0', 
                                    shape=(n_dim, 1),
                                    initializer = tf.keras.initializers.RandomUniform(minval=-5., maxval=5.), 
                                    trainable=True)
        super(MyLayer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        #return tf.exp(self._lambda1 * x + self._lambda0)
        return tf.exp(x @ self._lambda0)

mymodel_inputtest = Input(shape=(n_dim,))
mymodel_test = MyLayer()(mymodel_inputtest)
model_generator = Model(mymodel_inputtest, mymodel_test)

inputs_disc = Input((n_dim, ))
hidden_layer_1_disc = Dense(50, activation='relu')(inputs_disc)
hidden_layer_2_disc = Dense(50, activation='relu')(hidden_layer_1_disc)
hidden_layer_3_disc = Dense(50, activation='relu')(hidden_layer_2_disc)
outputs_disc = Dense(1, activation='sigmoid')(hidden_layer_3_disc)
model_discrimantor = Model(inputs=inputs_disc, outputs=outputs_disc)

def weighted_binary_crossentropy(y_true, y_pred):
    weights = tf.gather(y_true, [1], axis=1) # event weights
    y_true = tf.gather(y_true, [0], axis=1) # actual y_true for loss
    
    weights_1 = K.sum(y_true*weights)
    weights_0 = K.sum((1-y_true)*weights)
    
    # Clip the prediction value to prevent NaN's and Inf's
    epsilon = K.epsilon()
    y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
    t_loss = -weights * ((y_true) * K.log(y_pred)/weights_1 +
                         (1 - y_true) * K.log(1 - y_pred)/weights_0)
    return K.mean(t_loss)

model_discrimantor.compile(loss=weighted_binary_crossentropy, optimizer='adam')

def weighted_binary_crossentropy_GAN(y_true, y_pred):
    weights = tf.gather(y_pred, [1], axis=1) # event weights
    y_pred = tf.gather(y_pred, [0], axis=1) # actual y_pred for loss
    
    weights_1 = K.sum(y_true*weights)
    weights_0 = K.sum((1-y_true)*weights)
    
    #tf.print("weights",weights_0,weights_1)
    
    # Clip the prediction value to prevent NaN's and Inf's
    epsilon = K.epsilon()
    y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
    t_loss = weights * ((1 - y_true) * K.log(1 - y_pred)/weights_0)
    return K.mean(t_loss)
    
model_discrimantor.trainable = False
mymodel_gan = Input(shape=(n_dim,))
gan_model = Model(inputs=mymodel_gan,outputs=concatenate([model_discrimantor(mymodel_gan),model_generator(mymodel_gan)]))

gan_model.compile(loss=weighted_binary_crossentropy_GAN, optimizer='adam')

In [7]:
xvals_1 = np.concatenate([gauss_data,gauss_sim])
yvals_1 = np.concatenate([np.ones(len(gauss_data)),np.zeros(len(gauss_sim))])


X_train_1, X_test_1, Y_train_1, Y_test_1 = train_test_split(xvals_1, yvals_1)

b = 100
n_epochs = b
n_batch = 128*b
n_batches = len(X_train_1) // n_batch

for i in range(n_epochs):
    mylambda0 = model_generator.layers[-1].get_weights()
    if i%(b//10)==0:
        print("on epoch =", i, np.array(mylambda0))
    #print("  ",np.sum(model_generator.predict(X_train_1,batch_size=1000)))
    for j in range(n_batches):
        X_batch = X_train_1[j*n_batch:(j+1)*n_batch]
        Y_batch = Y_train_1[j*n_batch:(j+1)*n_batch]
        W_batch = model_generator(X_batch)
        W_batch = np.array(W_batch).flatten()
        W_batch[Y_batch==1] = 1
        #W_batch[Y_batch==0] = 1
        
        Y_batch_2 = np.stack((Y_batch, W_batch), axis=1)
        
        model_discrimantor.train_on_batch(X_batch, Y_batch_2)
        
        #print("      ",j,np.sum(model_generator.predict(X_batch,batch_size=1000)),np.log(model_generator.predict([1.]))-np.log(model_generator.predict([0.])),np.log(model_generator.predict([0.])))
        
        gan_model.train_on_batch(X_batch[Y_batch==0],np.zeros(len(X_batch[Y_batch==0])))

on epoch = 0 [[[-1.2789392]
  [ 3.8647003]]]
on epoch = 10 [[[-1.3335301]
  [ 3.7667277]]]
on epoch = 20 [[[-1.3738631]
  [ 3.6364532]]]
on epoch = 30 [[[-1.4080834]
  [ 3.5056667]]]
on epoch = 40 [[[-1.4378322]
  [ 3.3702831]]]
on epoch = 50 [[[-1.4625871]
  [ 3.2300935]]]
on epoch = 60 [[[-1.478641 ]
  [ 3.0858252]]]
on epoch = 70 [[[-1.4821802]
  [ 2.93733  ]]]
on epoch = 80 [[[-1.4703212]
  [ 2.7841988]]]
on epoch = 90 [[[-1.4413182]
  [ 2.626203 ]]]


In [8]:
lambda0 = np.array(model_generator.layers[-1].get_weights()).reshape(n_dim)
weights_1 = np.concatenate([np.ones(len(gauss_data)),np.exp(gauss_sim @ lambda0)*len(gauss_data)/np.sum(np.exp(gauss_sim @ lambda0))])

X_train_1, X_test_1, Y_train_1, Y_test_1, w_train_1, w_test_1 = train_test_split(xvals_1, yvals_1, weights_1)

print("Data", np.mean(X_test_1[Y_test_1==1], axis=0))
print("Sim", np.mean(X_test_1[Y_test_1==0], axis=0))
print("Weighted Sim", np.average(X_test_1[Y_test_1==0],weights=w_test_1[Y_test_1==0], axis=0))

Data [0.00838812 0.00728141]
Sim [1.010666   0.99926261]
Weighted Sim [-0.97098688  3.63861248]


## Two Moments

In [6]:
class MyLayer(Layer):

    def __init__(self, **kwargs):
        super(MyLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        self._lambda0 = self.add_weight(name='lambda0', 
                                    shape=(n_dim, 1),
                                    initializer = tf.keras.initializers.Constant(0.1),
                                        #tf.keras.initializers.RandomUniform(minval=-5., maxval=5.), 
                                    trainable=True)
        self._lambda1 = self.add_weight(name='lambda1', 
                            shape=(n_dim, n_dim),
                            initializer = tf.keras.initializers.Constant(0.1),
                                        #tf.keras.initializers.RandomUniform(minval=-5., maxval=5.), 
                            trainable=True)
        super(MyLayer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
#         print(f"{x.shape = }")
#         print(f"{self._lambda0.shape = }")
#         print(f"{self._lambda1.shape = }")
        Term1 = x @ self._lambda0
        Term2 = tf.linalg.diag_part(x @ self._lambda1 @ tf.transpose(x))
        Term2 = tf.expand_dims(Term2, -1)
#         print(f"{Term1.shape = }")
#         print(f"{Term2.shape = }")
#         print(f"{(Term1 + Term2).shape = }")
        
        return tf.exp(Term1 + Term2)

mymodel_inputtest = Input(shape=(n_dim,))
mymodel_test = MyLayer()(mymodel_inputtest)
model_generator = Model(mymodel_inputtest, mymodel_test)

inputs_disc = Input((n_dim, ))
hidden_layer_1_disc = Dense(50, activation='relu')(inputs_disc)
hidden_layer_2_disc = Dense(50, activation='relu')(hidden_layer_1_disc)
hidden_layer_3_disc = Dense(50, activation='relu')(hidden_layer_2_disc)
outputs_disc = Dense(1, activation='sigmoid')(hidden_layer_3_disc)
model_discrimantor = Model(inputs=inputs_disc, outputs=outputs_disc)

def weighted_binary_crossentropy(y_true, y_pred):
    weights = tf.gather(y_true, [1], axis=1) # event weights
    y_true = tf.gather(y_true, [0], axis=1) # actual y_true for loss
    
    weights_1 = K.sum(y_true*weights)
    weights_0 = K.sum((1-y_true)*weights)
    
    # Clip the prediction value to prevent NaN's and Inf's
    epsilon = K.epsilon()
    y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
    t_loss = -weights * ((y_true) * K.log(y_pred)/weights_1 +
                         (1 - y_true) * K.log(1 - y_pred)/weights_0)
    return K.mean(t_loss)

model_discrimantor.compile(loss=weighted_binary_crossentropy, optimizer='adam')

def weighted_binary_crossentropy_GAN(y_true, y_pred):
    weights = tf.gather(y_pred, [1], axis=1) # event weights
    y_pred = tf.gather(y_pred, [0], axis=1) # actual y_pred for loss
    
    weights_1 = K.sum(y_true*weights)
    weights_0 = K.sum((1-y_true)*weights)
    
    #tf.print("weights",weights_0,weights_1)
    
    # Clip the prediction value to prevent NaN's and Inf's
    epsilon = K.epsilon()
    y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
    t_loss = weights * ((1 - y_true) * K.log(1 - y_pred)/weights_0)
    return K.mean(t_loss)
    
model_discrimantor.trainable = False
mymodel_gan = Input(shape=(n_dim,))
gan_model = Model(inputs=mymodel_gan,outputs=concatenate([model_discrimantor(mymodel_gan),model_generator(mymodel_gan)]))

gan_model.compile(loss=weighted_binary_crossentropy_GAN, optimizer='adam')

In [7]:
xvals_1 = np.concatenate([gauss_data,gauss_sim])
yvals_1 = np.concatenate([np.ones(len(gauss_data)),np.zeros(len(gauss_sim))])


X_train_1, X_test_1, Y_train_1, Y_test_1 = train_test_split(xvals_1, yvals_1)

b = 100
n_epochs = b
n_batch = 128*b
n_batches = len(X_train_1) // n_batch

for i in range(n_epochs):
    mylambda0 = model_generator.layers[-1].get_weights()[0]
    mylambda1 = model_generator.layers[-1].get_weights()[1]
    if i%(b//10)==0:
        print("on epoch =", i)
        print(np.array(mylambda0))
        print(np.array(mylambda1))
    #print("  ",np.sum(model_generator.predict(X_train_1,batch_size=1000)))
    for j in range(n_batches):
        X_batch = X_train_1[j*n_batch:(j+1)*n_batch]
        #print(f"{X_batch.shape = }")
        Y_batch = Y_train_1[j*n_batch:(j+1)*n_batch]
        #print(f"{Y_batch.shape = }")
        W_batch = model_generator(X_batch)
        #print(f"Pre flattening {W_batch.shape = }")
        W_batch = np.array(W_batch).flatten()
        #print(f"Post flattening {W_batch.shape = }")
        W_batch[Y_batch==1] = 1
        W_batch[Y_batch==0] = 1
        
        Y_batch_2 = np.stack((Y_batch, W_batch), axis=1)
        
        model_discrimantor.train_on_batch(X_batch, Y_batch_2)
        
        #print("      ",j,np.sum(model_generator.predict(X_batch,batch_size=1000)),np.log(model_generator.predict([1.]))-np.log(model_generator.predict([0.])),np.log(model_generator.predict([0.])))
        
        gan_model.train_on_batch(X_batch[Y_batch==0],np.zeros(len(X_batch[Y_batch==0])))

on epoch = 0
[[0.1]
 [0.1]
 [0.1]]
[[0.1 0.1 0.1]
 [0.1 0.1 0.1]
 [0.1 0.1 0.1]]
on epoch = 10
[[-0.03484944]
 [-0.03237605]
 [-0.03469536]]
[[-0.00916653 -0.01971382 -0.01996004]
 [-0.01971382 -0.01057663 -0.0175357 ]
 [-0.01996004 -0.01753569 -0.01512844]]
on epoch = 20
[[-0.18040502]
 [-0.17839366]
 [-0.17955777]]
[[-0.10332684 -0.11964564 -0.12141013]
 [-0.11964563 -0.10747249 -0.11440329]
 [-0.1214101  -0.11440329 -0.11044312]]
on epoch = 30
[[-0.3046459 ]
 [-0.30497426]
 [-0.30403447]]
[[-0.17687069 -0.18710117 -0.19073603]
 [-0.18710114 -0.17764111 -0.18283243]
 [-0.190736   -0.1828324  -0.18066981]]
on epoch = 40
[[-0.41755444]
 [-0.4201348 ]
 [-0.4177858 ]]
[[-0.23552282 -0.23705713 -0.24261133]
 [-0.2370571  -0.2304167  -0.23569284]
 [-0.2426113  -0.23569281 -0.23234229]]
on epoch = 50
[[-0.52409476]
 [-0.5293728 ]
 [-0.52599657]]
[[-0.28312063 -0.27498236 -0.28266442]
 [-0.27498233 -0.2699781  -0.27788344]
 [-0.2826644  -0.27788338 -0.26927266]]
on epoch = 60
[[-0.6266938 ]


In [8]:
lambda0 = np.array(model_generator.layers[-1].get_weights()[0]).reshape(n_dim)
lambda1 = np.array(model_generator.layers[-1].get_weights()[1]).reshape((n_dim, n_dim))
A = (gauss_sim @ lambda1 @ gauss_sim.T).diagonal()
weights_1 = np.concatenate([np.ones(len(gauss_data)),np.exp(gauss_sim @ lambda0 + A)*len(gauss_data)/np.sum(np.exp(gauss_sim @ lambda0 + A))])

X_train_1, X_test_1, Y_train_1, Y_test_1, w_train_1, w_test_1 = train_test_split(xvals_1, yvals_1, weights_1)

In [19]:
print("Mean:")
print("Data", np.mean(X_test_1[Y_test_1==1], axis=0))
print("Sim", np.mean(X_test_1[Y_test_1==0], axis=0))
print("Weighted Sim", np.average(X_test_1[Y_test_1==0],weights=w_test_1[Y_test_1==0], axis=0))

Mean:
Data [-0.0121952  -0.00516054 -0.00771615]
Sim [0.99839323 0.9957826  1.00157077]
Weighted Sim [ 0.00305805 -0.00293258 -0.0696174 ]


In [23]:
print("\n\n\nVariance:")
print("Data", np.round(np.mean([[X_test_1[Y_test_1==1][:, i]*X_test_1[Y_test_1==1][:, j] for i in range(3)] for j in range(3)], axis=-1), 2))
print("Sim", np.round(np.mean([[X_test_1[Y_test_1==0][:, i]*X_test_1[Y_test_1==0][:, j] for i in range(3)] for j in range(3)], axis=-1), 2))
print("Weighted Sim", np.round(np.average([[X_test_1[Y_test_1==0][:, i]*X_test_1[Y_test_1==0][:, j] for i in range(3)] for j in range(3)], axis=-1,weights=w_test_1[Y_test_1==0]), 2))




Variance:
Data [[ 1.   -0.01  0.  ]
 [-0.01  1.   -0.  ]
 [ 0.   -0.    1.  ]]
Sim [[2.01 0.99 1.01]
 [0.99 1.98 1.  ]
 [1.01 1.   2.02]]
Weighted Sim [[ 0.7  -0.21 -0.23]
 [-0.21  0.86 -0.3 ]
 [-0.23 -0.3   0.97]]
