In [32]:
import torch
import torch.nn as nn

# Define a simple neural network for f(x)
class SimpleNN(nn.Module):
    def __init__(self, input_dim, hidden_dim=64):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, 1)  # output is a single scalar for f(x)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        return self.fc2(x)
    

class MatrixMultiplier(nn.Module):
    def __init__(self, input_dim, param_value=1.0):
        super(MatrixMultiplier, self).__init__()
        
        # Initialize a weight matrix with all elements set to param_value
        self.weight = nn.Parameter(torch.full((input_dim, input_dim), param_value))
        
        # No bias for simplicity, can be added if necessary
        self.bias = None

    def forward(self, x):
        return torch.matmul(x, self.weight).sum()

In [33]:
def compute_gradient(model, x):
    # Ensure x is detached from any previous computation graph
    x = x.detach().clone()
    x.requires_grad = True
    
    # Compute model's forward pass
    y = model(x)
    
    # Ensure existing gradients (if any) are zeroed out
    if x.grad is not None:
        x.grad.zero_()
    
    # Compute backward pass to get gradients
    y.backward()
    
    return x.grad

import torch

def compute_jacobian(model, x):
    """
    Computes the Jacobian matrix of model with respect to x
    """
    # Ensure x is detached from any previous computation graph
    x = x.detach().clone()
    x.requires_grad = True
    
    # Initialize Jacobian as empty tensor
    jacobian = torch.zeros(x.shape[0], x.nelement())
    
    # Compute model's forward pass
    y = model(x)
    
    for i in range(y.shape[0]):
        # Ensure existing gradients (if any) are zeroed out
        if x.grad is not None:
            x.grad.zero_()
        
        # Compute backward pass for specific output element
        y[i].backward(retain_graph=True)
        
        # Each row of the Jacobian corresponds to the gradient
        # of one output with respect to the input
        jacobian[i] = x.grad.view(-1)
    
    return jacobian

In [34]:
# Compute d_tilde(x) based on the formula
def compute_d_tilde(model, x):
    grad_f = compute_gradient(model, x)
    d_tilde = -((2.*x) - 1.) * grad_f
    return d_tilde

# Flip the i-th bit of x
def flip_bit(x, i):
    x_flipped = x.clone()
    x_flipped[i] = 1 - x_flipped[i]
    return x_flipped

In [35]:
# Main Execution
input_dim = 10  # Example for D=10 dimensional binary data
model = MatrixMultiplier(input_dim,param_value=20.)

# Example input
x = torch.rand(input_dim)
x = (x > 0.5).float()  # Convert to binary
x

tensor([1., 1., 1., 1., 0., 1., 0., 1., 0., 0.])

In [36]:
compute_gradient(model,x)

tensor([200., 200., 200., 200., 200., 200., 200., 200., 200., 200.])

In [37]:
d_tilde = compute_d_tilde(model, x)

In [38]:
# Example for i-th bit flipped
i = 3  # 3rd bit as an example
x_flipped = flip_bit(x, i)
x_flipped

tensor([1., 1., 1., 0., 0., 1., 0., 1., 0., 0.])

In [43]:
all_differences = []
for i in range(input_dim):
    x_flipped = flip_bit(x,i)
    f_difference = model(x_flipped) - model(x)
    all_differences.append(f_difference.item())
all_differences

[-200.0, -200.0, -200.0, -200.0, 200.0, -200.0, 200.0, -200.0, 200.0, 200.0]

In [40]:
d_tilde = compute_d_tilde(model, x)
d_tilde

tensor([-200., -200., -200., -200.,  200., -200.,  200., -200.,  200.,  200.])

In [41]:
print("d_tilde:", d_tilde)
print("f(x_flipped) - f(x):", f_difference.item())

d_tilde: tensor([-200., -200., -200., -200.,  200., -200.,  200., -200.,  200.,  200.])
f(x_flipped) - f(x): -200.0


In [10]:
model.
compute_gradient(model,x)

In [None]:
import torch
import torch.nn.functional as F

# Define the Deep EBM (from our previous discussion)
class DeepEBM(nn.Module):
    def __init__(self, input_dim, hidden_dim=64):
        super(DeepEBM, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, 1)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc2(x)

def compute_d_tilde(x, f):
    gradient = torch.autograd.grad(f(x).sum(), x, create_graph=True)[0]
    return -(2*x - 1) * gradient

def gibbs_with_gradients(f, x_current):
    # Compute d_tilde for the current sample
    d = compute_d_tilde(x_current, f)

    # Compute q based on d_tilde
    probs = F.softmax(d / 2, dim=0)
    i = torch.multinomial(probs, 1).item()

    # Create the new sample by flipping the i-th bit
    x_prime = x_current.clone()
    x_prime[i] = 1 - x_current[i]

    # Compute d_tilde for the new sample
    d_prime = compute_d_tilde(x_prime, f)
    
    # Compute q for the new sample based on d_tilde_prime
    probs_prime = F.softmax(d_prime / 2, dim=0)

    # Accept with probability:
    accept_prob = min(torch.exp(f(x_prime) - f(x_current)) * (probs_prime[i] / probs[i]), 1)
    if torch.rand(1) < accept_prob:
        return x_prime
    else:
        return x_current

In [None]:
# Example usage
input_dim = 10
ebm = DeepEBM(input_dim)
ebm = ebm.train()
x_sample = torch.randint(2, (input_dim,), dtype=torch.float32)  # initialize a random binary vector

In [None]:
ebm

In [None]:
new_sample = gibbs_with_gradients(ebm, x_sample)

print("Initial Sample:", x_sample)
print("New Sample after Gibbs with Gradients:", new_sample)