## pRNN wavefunction with Constrained Sampling

We define a class $\textrm{RNNWavefunction(tf.keras.Model)}$ for sampling monomials of homogeneous degree in a given number M of variables $(x_1,x_2,...,x_M)$. 
The homogeneity constraint does not spoils the auto-regressive property of RNNs. It is implemented generalizing the scheme of $\textit{Appendix D}$ of $\textrm{[PhysRevResearch.2.023358]}$. 

In [104]:
# pRNN wavefunction ansazt with GRU layer + Dense softmax
# Samples homogeneous monomials in Nc^2 and fixed degree 
# Needs to be fixed by broadcasting

import tensorflow as tf
import numpy as np


class RNNWavefunction(tf.keras.Model):
    def __init__(self, system_size, units=20, input_dim=3, output_dim=3, seed=211):
        """
        system_size: int, number of timesteps or system size (= number of monomial variables)
        units: int, number of units in the GRU layer
        input_dim: int, number of input features (= monomial homogeneous degree = charge+1)
        output_dim: int, number of output features (= monomial homogeneous degree = charge+1)
        seed: int, the random seed for reproducibility
        """
        super(RNNWavefunction, self).__init__()

        # Set random seeds for reproducibility
        np.random.seed(seed)
        tf.random.set_seed(seed)

        self.system_size = system_size
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        # Define the GRU layer (one GRU layer with specified units)
        self.gru = tf.keras.layers.GRU(units=units, return_sequences=True, return_state=True)
        
        # Final Dense layer with Softmax output (probabilities, not logits!)
        self.dense = tf.keras.layers.Dense(output_dim, activation="softmax")
        
    def call(self, inputs, hidden_state=None, training=False):
        """
        Forward pass through the network with fixed hidden state.
        """
        print("Now input is: "+str(inputs))
        if hidden_state is None:
            hidden_state = tf.zeros((inputs.shape[0], self.gru.units))  # Fixed hidden state

        x, hidden_state = self.gru(inputs, initial_state=hidden_state, training=training)  # GRU layer
        x = self.dense(x)  # Apply Dense layer

        print("result of softmax = "+str(x))
        return x, hidden_state
    
    def sample(self, numsamples):

        """
        Generate samples from the probability distribution parameterized by the RNN.
        numsamples: int, number of samples to generate
        """
        samples = []  # List to store the generated sequence
        inputs = tf.zeros((numsamples, 1, self.input_dim), dtype=tf.float32)  # Initial input (zero vector)
        hidden_state = None  # No initial hidden state

        for t in range(self.system_size-1): # Sampling cycle over system_size =  number of variables
            output, hidden_state = self.call(inputs, hidden_state=hidden_state)  # Forward pass through the model
            #print("At step t="+str(t)+" the output is: "+str(output))
            # Get probabilities for the last generated timestep
            softout= output[:, -1, :]  # Shape: [numsamples, output_dim]
            
            # Projection of softmax probabilities imposing charge conservation
            # At each step applied a mask thetavec that evaluates to zero over all the samples that exceed the total charge
            softout_t = np.copy(softout)
            thetavec = [np.heaviside(self.input_dim-1-np.sum(np.array(samples),axis=0)-i,1) for i in range(softout_t.shape[1])] 
            softout_t = np.array([softout_t[:,i]*thetavec[i] for i in range(softout_t.shape[1])])
            norm = np.sum(softout_t,axis=0) 
            softout_t = softout_t/norm # Re-normalize the masked softmax output

            softout_t = np.transpose(softout_t)
            print("At step t="+str(t)+" the softmax probs are: "+str(softout_t))
            # Sample from categorical distribution
        
            sampled_t = tf.random.categorical(tf.math.log(softout_t), num_samples=1)  # Shape: [numsamples, 1]
            sampled_t = tf.squeeze(sampled_t, axis=-1)  # Shape: [numsamples]

            print("The new sampled site is "+str(sampled_t)+" at time step ="+str(t))
            # Append sampled values to the list
            samples.append(sampled_t)
        
            print("At time step "+str(t)+" the sample is ="+str( tf.stack(samples, axis=1)))
            # Convert sampled values to one-hot encoding for the next input
            inputs = tf.one_hot(sampled_t, depth=self.output_dim, dtype=tf.float32)
            #print("Ready to expand the new input "+str(inputs)+" at time step ="+str(t))
            inputs = tf.expand_dims(inputs, axis=1)  # Add time-step dimension
            
        samples = tf.stack(samples, axis=1)

        # Add last system site to the sample, fixed by charge conservation
        J = tf.constant((self.input_dim-1)*np.ones(samples.shape[0])-np.sum(samples,axis=1))
        J = np.transpose(tf.cast(tf.expand_dims(J, axis=0), dtype = tf.int64))
        #print(samples)
        #print(J)
        samples = tf.concat([samples, J], axis=1)

        return samples

    def log_probability(self, samples):
        """
        Calculate log-probabilities of the given samples.
        samples: Tensor, shape (numsamples, system_size), the sampled wavefunction
        """
        # Convert samples to one-hot encoding

        

        one_hot_samples = tf.one_hot(samples, depth=self.output_dim, dtype=tf.float32)

        inputs = one_hot_samples  # Shape: [numsamples, system_size, output_dim]
    
        # Ensure evaluation mode (training=False)
        probs, _ = self.call(inputs, training=False)  # Forward pass through the model with training=False
        print(probs)
        
        # Compute log probabilities (log(p(x)))
        log_probs = tf.reduce_sum(tf.math.log(tf.reduce_sum(tf.multiply(probs, one_hot_samples), axis=-1)), axis=-1)

        return log_probs

In [107]:
# Parameters
Nc = 2 
system_size = Nc*Nc # Number of timesteps = # of variables M
charge = 1 # Polynomial Degree
input_dim = charge+1  # Number of input features = maximal number of excitations = exponents (includes 0)
output_dim = charge+1   # Number of output classes = maximal number of excitations = exponents (includes 0)

# GRU units
units = 10
numsamples = 1  # Number of samples to generate

# Instantiate the RNNWavefunction model
model = RNNWavefunction(system_size, units, input_dim, output_dim, seed=np.random.randint(1,200))

# Example: Sampling
samples = model.sample(numsamples)
print(f"Generated Samples:\n{samples.numpy()[:10]}")

# Example: Evaluation
log_probs = model.log_probability(samples)
print(f"Generated Samples:\n{log_probs.numpy()[:10]}")

Now input is: tf.Tensor([[[0. 0.]]], shape=(1, 1, 2), dtype=float32)
result of softmax = tf.Tensor([[[0.5 0.5]]], shape=(1, 1, 2), dtype=float32)
At step t=0 the softmax probs are: [[0.5 0.5]]
The new sampled site is tf.Tensor([1], shape=(1,), dtype=int64) at time step =0
At time step 0 the sample is =tf.Tensor([[1]], shape=(1, 1), dtype=int64)
Now input is: tf.Tensor([[[0. 1.]]], shape=(1, 1, 2), dtype=float32)
result of softmax = tf.Tensor([[[0.5265186  0.47348142]]], shape=(1, 1, 2), dtype=float32)
At step t=1 the softmax probs are: [[1. 0.]]
The new sampled site is tf.Tensor([0], shape=(1,), dtype=int64) at time step =1
At time step 1 the sample is =tf.Tensor([[1 0]], shape=(1, 2), dtype=int64)
Now input is: tf.Tensor([[[1. 0.]]], shape=(1, 1, 2), dtype=float32)
result of softmax = tf.Tensor([[[0.46289626 0.5371037 ]]], shape=(1, 1, 2), dtype=float32)
At step t=2 the softmax probs are: [[1. 0.]]
The new sampled site is tf.Tensor([0], shape=(1,), dtype=int64) at time step =2
At time