In [None]:
#import the modules 
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from scipy import special
import scipy.stats as stats
from numba import jit
from tqdm.notebook import tqdm
import sys
from pathlib import Path
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_probability as tfp
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from keras import callbacks, optimizers
from keras_tqdm import TQDMNotebookCallback
from tqdm.keras import TqdmCallback
from keras import backend as K
from keras import activations, initializers
from keras.layers import Layer

In [None]:
import warnings
warnings.filterwarnings('ignore')
SMALL_SIZE = 10
MEDIUM_SIZE = 12
BIGGER_SIZE = 22

plt.rc('font', size=12)          # controls default text sizes
plt.rc('axes', titlesize=12)     # fontsize of the axes title
plt.rc('axes', labelsize=10)    # fontsize of the x and y labels
# plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
# plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
# plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
# plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title


Defining Swish Activation function 

In [None]:
from keras.backend import sigmoid
from keras.utils.generic_utils import get_custom_objects
from keras.layers import Activation

def swish(x, beta = 1):
    return (x * sigmoid(beta * x))

get_custom_objects().update({'swish': Activation(swish)})

In [None]:
#Training data with fermion matrix calculated using exponential discretization
exp_input_file = "Expinputs_2sites_U2.0B4.0Nt16.npy"
exp_target_file = "ExpHex2sitesNt16/targets_2sites_U2.0B4.0Nt16.npy" 

#Training data with fermion matrix calculated using diagonal discretization
dia_input_file = "inputs_2sites_U2.0B4.0Nt16.npy"
dia_target_file = "targets_2sites_U2.0B4.0Nt16.npy" #diag

dia_x = np.load(dia_input_file)
dia_y = np.load(dia_target_file) 

exp_x = np.load(exp_input_file)  
exp_y = np.load(exp_target_file)

Nt = float(input_file[-6:-4])
beta = float(input_file [-11:-8])
U = float(input_file [-15:-12])
usqrt = np.sqrt(U*beta/Nt)
print("U  beta  Nt  Usqrt")
print(U,beta,Nt,np.round(usqrt,3))


In [None]:
# data scaling
exp_gaus = np.real(exp_y[:10000])
exp_HMC = np.real(exp_y[10000:])
diag_gaus = dia_y[:10000]
diag_HMC = dia_y[10000:]

print("(min,max) force(exp)",(np.min(np.real(exp_y)),np.max(np.real(exp_y))),
      "\n(min,max) force(diag)",(np.min(np.real(dia_y)),np.max(np.real(dia_y))),
      "\n(min,max) force Gauss exp",(np.min(exp_gaus),np.max(exp_gaus)),
      "\n(min,max) force Gauss diag",(np.min(diag_gaus),np.max(diag_gaus)),
      "\n(min,max) force HMC exp",(np.min(exp_HMC),np.max(exp_HMC)),
      "\n(min,max) force HMC diag",(np.min(diag_HMC),np.max(diag_HMC)))
     

In [None]:
# Removing the outliers and preparing the data
def scalingData(a,b,threshold):   
    data_x = []
    data_y = []
    for i in range(b.shape[0]):
        if np.max(np.abs(np.real(b[i]))) < threshold :
            data_x.append(a[i])
            data_y.append(b[i])

    data_x = np.array(data_x)
    data_y = np.array(data_y)
    print("inputs=",data_x.shape,"targets=",data_y.shape)
    plt.title("Targets vs inputs ")
    plt.xlabel("x")
    plt.ylabel("grad(x)")
    plt.scatter(data_x,data_y)
#     plt.savefig("plots/scaleddataDIA.png")
    plt.show()

    return data_x,data_y



In [None]:
x_scaledExp,y_scaledExp = scalingData(exp_x,exp_y,50.0)


In [None]:
x_scaledDiag,y_scaledDiag = scalingData(dia_x,dia_y,50.0)

In [None]:
print("(min,max) force(exp)",(np.min(np.real(y_scaledExp)),np.max(np.real(y_scaledExp))),
      "\n(min,max) force(diag)",(np.min(np.real(y_scaledDiag)),np.max(np.real(y_scaledDiag))))
      

In [None]:
def plotsData(x,y,c):
    mean_phi,abs_phi,mean_grad,abs_grad = [],[],[],[]

    for i in range(len(x)):
        mean_phi.append(np.mean(x[i]))
        abs_phi.append(np.sum(np.abs(x[i])**2))
        mean_grad.append(np.mean(y[i]))
        abs_grad.append(np.sum(np.abs(y[i])**2))

    fig,ax = plt.subplots(2,2,figsize=(15,10))
    axx = ax[0][0]
    axx.scatter(mean_phi,mean_grad)
    axx.set_title(" Mean Gradient vs  Mean Phi")
    axx.set_xlabel("mean[phi]")
    axx.set_ylabel("mean[grad(phi)]")

    axx = ax[0][1]
    axx.scatter(mean_phi,abs_grad)
    axx.set_title(" Absolute Gradient Square vs  Mean Phi")
    axx.set_xlabel("mean[phi]")
    axx.set_ylabel("Sum[Abs(grad(phi))^2]")

    axx = ax[1][0]
    axx.scatter(abs_phi,mean_grad)
    axx.set_title(" Mean Gradient vs Absolute Phi Square")
    axx.set_xlabel("Sum[Abs(phi)^2)]")
    axx.set_ylabel("mean[grad(phi)]")

    axx = ax[1][1]
    axx.scatter(abs_phi,abs_grad)
    axx.set_title("Absolute Gradient Squaret vs Absolute Phi Square")
    axx.set_xlabel("Sum[Abs(phi)^2)]")
    axx.set_ylabel("Sum[Abs(grad(phi))^2]")

    if c == 1:
        plt.suptitle('U={}, beta={}, Nt={}\nsamples(DIA)={}'.format(U,beta,Nt,x.shape[0]),fontsize=20)
    else:
        plt.suptitle('U={}, beta={}, Nt={}\nsamples(EXP)={}'.format(U,beta,Nt,x.shape[0]),fontsize=20)
#     plt.savefig("plots/meanAbsDia.png")
    plt.show()

    return mean_phi,abs_phi,mean_grad,abs_grad


In [None]:
mean_phiExp,abs_phiExp,mean_gradExp,abs_gradExp = plotsData(np.real(x_scaledExp),np.real(y_scaledExp),0)

In [None]:
mean_phiDiag,abs_phiDiag,mean_gradDiag,abs_gradDiag = plotsData(np.real(x_scaledDiag),np.real(y_scaledDiag),1)

In [None]:
fig,ax = plt.subplots(2,2,figsize=(15,10))

axx = ax[0][0]
axx.hist(mean_gradExp,bins=50,range=(-5,5))
axx.set_title("Mean Gradient(EXP)")


axx = ax[0][1]
axx.hist(abs_gradExp,bins=50,range=(0,900))
axx.set_title(" Absolute Gradient Square (EXP)")

axx = ax[1][0]
axx.hist(mean_gradDiag,bins=50,range=(-5,5))
axx.set_title("Mean Gradient(DIA)")


axx = ax[1][1]
axx.hist(abs_gradDiag,bins=50,range=(0,900))
axx.set_title(" Absolute Gradient Square (DIA)")

# plt.savefig("plots/hist.png")
plt.show()

In [None]:
# fitting the prior

q = np.linspace(-30,30,50)
k = 0
fig,ax = plt.subplots(8,4,figsize=(8,12))
fig.tight_layout()
for i in range(8):
    for j in range(4):
        counts, bins = np.histogram(np.real(exp_y[10000:,k]),bins='auto')
        ax[i][j].hist(np.real(exp_y[10000:,k]),bins=bins,density=True)
        ax[i][j].plot(q, stats.norm.pdf(q, 0.,1.9)*1.3,'k-')
        ax[i][j].set_title(f"k{k}")
        k = k+1
# plt.savefig("prior_1.png")
plt.show()

# for i in range(3):
#     for j in range(3):
#         counts, bins = np.histogram(np.real(vy[10000:,k]),bins='auto')
#         ax[i][j].hist(np.real(vy[10000:,k]),bins=bins,density=True)
#         ax[i][j].plot(q, stats.norm.pdf(q, 0.,1.9)*1.3,'k-')
#         ax[i][j].set_title(f"k{k}")
#         k = k+1
#         if k ==32:
#             break

# # plt.show()
# # plt.title("Gradient(phi) element-wise EXP")


# # for i in range(32):
# #     plt.plot(q, stats.norm.pdf(q,0,1.9)*1.4,'k-')
# #     counts, bins = np.histogram(np.real(vy[10000:,i]),bins='auto')
# #     plt.hist(np.real(vy[10000:,i]),bins=bins,density=True)
   
# plt.savefig("prior_2.png")




In [None]:
#plotting all the elements together
counts, bins = np.histogram(np.real(exp_y[10000:]),bins='auto')
plt.hist(np.real(exp_y[10000:]),bins=bins,density=True)
plt.plot(q, stats.norm.pdf(q, 0.,1.9)*1.36,'k-')
plt.grid()
plt.show()

In [None]:
for i in range(32):
    plt.hist(y_scaledDiag[:,i],density=True)

plt.title("Gradient(phi) element-wise Diag")
# plt.savefig("plots/gradHistDia.png")
plt.show()


In [None]:
X_train, X_test, y_train, y_test = train_test_split(np.real(x_scaledExp),np.real(y_scaledExp), test_size=0.1,shuffle=True)
# X_train = np.real(x_scaledExp)
# y_train = np.real(y_scaledExp)
# X_test = np.real(x_scaledDiag)
# y_test = np.real(y_scaledDiag)
print("Train data size=",X_train.shape,"\t Test data size=",X_test.shape)

In [None]:
# num_samples = 10000
# trial_data = np.zeros((num_samples,xx.shape[1])) # data for testing the NN
# for i in range(num_samples): 
#     trial_data[i,:] = np.random.normal(0,usqrt,xx.shape[1])

# code 1

In [None]:
# def priorOld(kernel_size, bias_size=0, dtype=None):
#     n = kernel_size + bias_size
#     prior_model = keras.Sequential(
#         [
#             tfp.layers.DistributionLambda(
#                 lambda t: tfp.distributions.MultivariateNormalDiag(
#                     loc=tf.zeros(n), scale_diag=tf.ones(n)*usqrt
#                 )
#             )
#         ]
#     )
#     return prior_model

# def posteriorOld(kernel_size, bias_size=0, dtype=None):
#     n = kernel_size + bias_size
#     posterior_model = keras.Sequential(
#         [
#             tfp.layers.VariableLayer(
#                 tfp.layers.MultivariateNormalTriL.params_size(n), dtype=dtype
#             ),
#             tfp.layers.MultivariateNormalTriL(n),
#         ]#activity_regularizer=tfpl.KLDivergenceRegularizer(prior, weight=1.0)
#     )
#     return posterior_model



# code 2

For numeric stability the network is parametrized  with $\rho$ instead of $\sigma$ directly and transformed to $\rho$ 
with the softplus function.
The **Posterior** and $\rho$ are trainable parameters and are intialized by normal gaussian and zero respectively. 
The **Prior $p(w)$** is defined as :

\begin{equation}
\sigma = \log(1 + e^{\rho}) = \mathbf{softplus(\rho)}
\end{equation}

\begin{equation}
\mathbf{p(w)}= \pi\mathbf{\operatorname{N}(w| 0,\sigma^{2}_{1})} + (1-\pi)\mathbf{\operatorname{N}(w| 0,\sigma^{2}_{2})}
\end{equation}


In [None]:
# def mixture_prior_params(sigma_1, sigma_2, pi, return_sigma=False):
#     params = K.variable([sigma_1, sigma_2, pi], name='mixture_prior_params')
#     sigma = np.sqrt(pi * sigma_1 ** 2 + (1 - pi) * sigma_2 ** 2)
#     return params, sigma
  
# def log_mixture_prior_prob(w):
#     comp_1_dist = tf.distributions.Normal(0.0, prior_params[0])
#     comp_2_dist = tf.distributions.Normal(0.0, prior_params[1])
#     comp_1_weight = prior_params[2]
#     return K.log(comp_1_weight * comp_1_dist.prob(w) + (1 - comp_1_weight) * comp_2_dist.prob(w))
  
# # Mixture prior parameters shared across DenseVariational layer instances
# prior_params, prior_sigma = mixture_prior_params(sigma_1=1.0, sigma_2=usqrt, pi=0.0)
# tfd = tfp.distributions
# def  posterior(kernel_size, bias_size=0, dtype=None):
#     n = kernel_size + bias_size
#     c = np.log(np.expm1(1.))  # log(1.7182)= 0.54132
#     return tf.keras.Sequential([
#         tfp.layers.VariableLayer(2 * n, dtype=dtype),
#         tfp.layers.DistributionLambda(lambda t: tfd.Independent(
#             tfd.Normal(loc=t[..., :n],
#                        scale=1e-5 + tf.nn.softplus(c + t[..., n:])),
#             reinterpreted_batch_ndims=1)),
#     ])


# # Specify the prior over `keras.layers.Dense` `kernel` and `bias`.
# def prior(kernel_size, bias_size=0, dtype=None):
#     n = kernel_size + bias_size
#     return tf.keras.Sequential([
#         tfp.layers.VariableLayer(n, dtype=dtype),
#         tfp.layers.DistributionLambda(lambda t: tfd.Independent(
#             tfd.Normal(loc=t, scale=1.),
#             reinterpreted_batch_ndims=1)),
#     ])

# code 3

In [None]:
class DenseVariational(Layer):
    def __init__(self,
                 units,
                 kl_weight,
                 activation=None,
                 prior_sigma_1=1.707,
                 prior_sigma_2=1.0,
                 prior_pi=1.1, **kwargs):
        self.units = units
        self.kl_weight = kl_weight
        self.activation = activations.get(activation)
        self.prior_sigma_1 = prior_sigma_1
        self.prior_sigma_2 = prior_sigma_2
        self.prior_pi_1 = prior_pi
        self.prior_pi_2 = 0.0
        self.init_sigma = 1.8
#         self.init_sigma = np.sqrt(self.prior_pi_1 * self.prior_sigma_1 ** 2 +
#                                   self.prior_pi_2 * self.prior_sigma_2 ** 2)

        super().__init__(**kwargs)

    def compute_output_shape(self, input_shape):
        return input_shape[0], self.units

    def build(self, input_shape):
        self.kernel_mu = self.add_weight(name='kernel_mu',
                                         shape=(input_shape[1], self.units),
                                         initializer=tf.keras.initializers.RandomNormal(stddev=self.init_sigma),
                                         trainable=True)
        self.bias_mu = self.add_weight(name='bias_mu',
                                       shape=(self.units,),
                                       initializer=tf.keras.initializers.RandomNormal(stddev=self.init_sigma),
                                       trainable=True)
        self.kernel_rho = self.add_weight(name='kernel_rho',
                                          shape=(input_shape[1], self.units),
                                          initializer=initializers.constant(0.0),
                                          trainable=True)
        self.bias_rho = self.add_weight(name='bias_rho',
                                        shape=(self.units,),
                                        initializer=initializers.constant(0.0),
                                        trainable=True)
        super().build(input_shape)

    def call(self, inputs, **kwargs):  # backward pass
        kernel_sigma = tf.math.softplus(self.kernel_rho)
#         kernel_sigma = 0.0
        kernel = self.kernel_mu + kernel_sigma * tf.random.normal(self.kernel_mu.shape) # mu + sigma*epsilon

        bias_sigma = tf.math.softplus(self.bias_rho)
        bias = self.bias_mu + bias_sigma * tf.random.normal(self.bias_mu.shape) # mu + sigma*epsilon

        self.add_loss(self.kl_loss(kernel, self.kernel_mu, kernel_sigma) +
                      self.kl_loss(bias, self.bias_mu, bias_sigma))

        return self.activation(K.dot(inputs, kernel) + bias)

    def kl_loss(self, w, mu, sigma):
        variational_dist = tfp.distributions.Normal(mu, sigma)
        return self.kl_weight * K.sum(variational_dist.log_prob(w) - self.log_prior_prob(w))

    def log_prior_prob(self, w):
        comp_1_dist = tfp.distributions.Normal(0.0, self.prior_sigma_1)
        comp_2_dist = tfp.distributions.Normal(0.0, self.prior_sigma_2)
        return K.log(self.prior_pi_1 * comp_1_dist.prob(w) +
                     self.prior_pi_2 * comp_2_dist.prob(w))

In [None]:
learning_rate = 0.01
num_epochs = 100
batch_size = 80
kl_loss_weight = 1.0 / (X_train.shape[0]/batch_size)

print("kl_loss_weight=",kl_loss_weight)


In [None]:
prior_params = {
    'prior_sigma_1': 1.807, 
    'prior_sigma_2': 1.0, 
    'prior_pi': 1.1
}

x_in = Input(shape=(32,))
x = DenseVariational(32, kl_loss_weight, **prior_params, activation='relu')(x_in)
# x = DenseVariational(32, kl_loss_weight, **prior_params, activation='relu')(x)
# x = DenseVariational(32, kl_loss_weight, **prior_params)(x)
x = layers.Dense(units=32,)(x)
model = Model(x_in, x)
model.summary()

In [None]:
# # build Model
# # tf.compat.v1.disable_eager_execution()
# model_2 = tf.keras.Sequential([
#     tf.keras.layers.InputLayer(input_shape=(32,)),
#     tfp.layers.DenseVariational(units=32,
#                                 make_posterior_fn=posterior,
#                                 make_prior_fn=prior,
#                                 kl_weight=kl_loss_weight,
#                                 activation='relu'),
# #     tfp.layers.DenseVariational(units=32,
# #                                 make_posterior_fn=posterior,
# #                                 make_prior_fn=prior,
# #                                 kl_weight=kl_loss_weight,
# #                                 activation='relu'),
#     layers.Dense(units=32,)
# #      tfp.layers.DenseVariational(units=32,
# #                                 make_posterior_fn=posterior,
# #                                 make_prior_fn=prior,
# #                                 kl_weight=kl_loss_weight)
# ])
# model_2.summary()

In [None]:
def run_experiment(model, loss, X_train,y_train,X_test,y_test):

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss=loss,
        metrics=['accuracy'],
        run_eagerly=False
    )

    print("Start training the model...")
    history = model.fit(X_train,y_train, epochs=num_epochs,batch_size=batch_size, 
                        validation_data=(X_test,y_test),verbose=0,
                        callbacks=[TqdmCallback(verbose=1)])
 #                         callbacks=[TQDMNotebookCallback(leave_inner=True,leave_outer=True)])
    
    
    
    print("Model training finished.")
    # "Accuracy"
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()
    # "Loss"
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()





In [None]:
loss = keras.losses.MeanAbsoluteError()
# loss = keras.losses.MeanSquaredError()
# loss = tf.keras.losses.LogCosh()


# def neg_log_likelihood(y_obs, y_pred, sigma=1.9):
#     dist = tfp.distributions.Normal(loc=y_pred, scale=sigma)
#     return K.sum(-dist.log_prob(y_obs))

# loss = neg_log_likelihood

print("Number of batches in:\ntrain_dataset=",int(X_train.shape[0]/batch_size),
      "\ntest dataset=",int(X_test.shape[0]/batch_size))

run_experiment(model, loss,X_train,y_train,X_test,y_test)
loss_1, accuracy_1 = model.evaluate(X_test, y_test)
print('Accuracy: %.2f'% (accuracy_1*100), 'Loss: %.2f' % (loss_1))

# run_experiment(model_2, loss,X_train,y_train,X_test,y_test)
# loss_2, accuracy_2 = model_2.evaluate(X_test, y_test)
# print('Accuracy: %.2f'% (accuracy_2*100), 'Loss: %.2f' % (loss_2))

In [None]:
# testing data
#ex =[1.8506733132414595, -0.5413392919167934, -0.08290738562338233, 0.21020703115540967, 0.45736532200026686, 0.4144505763882332, 0.7974591877817361, -1.0755342176198304, -1.2923531024863615, 0.7928643052728441, 1.8700508128542486, 0.9516103164912759, -0.4170465613156157, -0.5803716218887971, -1.4581616083955689, -0.40999934680233296, -1.6091129317275288, -0.4340613520847116, 0.11684358400103442, -0.1907166017062774, 1.2559540251043124, 0.15898709633936114, 0.21429222182377086, -0.409223823105235, 1.572301166661345, 0.4515499421024908, 0.4609354654439869, -0.15711651718793024, -0.056095371688984985, -0.8742907599873543, -0.0343957073741717, 0.4250477713080214]
# ex = np.array(ex).reshape(1,32)
# g = [49.86549443077032, -32.829898739627914, 31.066976142463147, -16.39544920290602, 23.644040843599672, -7.483481398329604, 19.671315362354513, -5.810537773537636, 15.51302201582224, -2.0950719617562044, 19.184476694104273, 0.8757732130798428, 14.160922938674743, -1.738831656590501, 13.75108490801291, -3.0704791699156475, 15.426648483802477, -5.096069402933892, 19.006163317993384, -4.736981704910806, 25.584616583763687, -8.337806692383278, 31.611048236397345, -17.583983790467204, 44.126115313888214, -25.66148544786747, 43.56402785659327, -28.53946231158809, 42.269855498042155, -29.71370011290176, 36.99478211467913, -21.796550338318383]
# g = np.array(g)

In [None]:
example_input = np.array(np.real(exp_x[0])).reshape(1,32)
example_target =np.real(exp_y[0])

In [None]:
predicted_1 = []
predicted_2 = []
iterations = 1
for _ in range(iterations):
    predicted_1.append(model(example_input).numpy())
#     predicted_2.append(model_2(example_input).numpy())
    
pre = np.array(predicted_1)
# print(pre.shape)

predicted_1 = np.concatenate(np.array(predicted_1), axis=0)
# predicted_2 = np.concatenate(np.array(predicted_2), axis=0)
print(predicted_1.shape)

prediction1_mean = np.mean(predicted_1, axis=0).tolist()
# prediction2_mean = np.mean(predicted_2, axis=0).tolist()
# print(np.array(prediction1_mean).shape)

# prediction1_min = np.min(predicted_1, axis=1).tolist()
# prediction1_max = np.max(predicted_1, axis=1).tolist()
# prediction1_range = (np.max(predicted_1, axis=1) - np.min(predicted_1, axis=1)).tolist()

# prediction2_min = np.min(predicted_2, axis=1).tolist()
# prediction2_max = np.max(predicted_2, axis=1).tolist()
# prediction2_range = (np.max(predicted_2, axis=1) - np.min(predicted_2, axis=1)).tolist()

for idx in range(1):
#     print(
#         f"Predictions mean: {round(prediction1_mean[idx], 2)}, "
# #         f"min: {round(prediction1_min[idx], 2)}, "
#         f"max: {round(prediction1_max[idx], 2)}, "
#         f"range: {round(prediction1_range[idx], 2)} - "
    
    print(
        f"Predictions mean: {np.array(prediction1_mean)}, "
#         f"min: {round(prediction2_min[idx], 2)}, "
#         f"max: {round(prediction2_max[idx], 2)}, "
#         f"range: {round(prediction2_range[idx], 2)} - "
    )
    print("actual target=",example_target)
    print("difference in prediction and actual=",np.array(prediction1_mean)-np.array(example_target))