In [1]:
import numpy as np
import gymnasium as gym
from gymnasium.spaces import Dict, Discrete, Box
import torch

import os 
import sys

sys.path.append(os.path.abspath('../..'))

from environment.env import POMDPDeformedGridworld

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class NN(nn.Module):
    def __init__(self):
        super(NN, self).__init__()

        self.f1 = nn.Linear(6, 128)
        self.f2 = nn.Linear(128, 128)
        self.f3 = nn.Linear(128, 128)
        self.f4 = nn.Linear(128, 1)
        
    def forward(self, pos,theta):
        x = torch.cat([pos,theta], dim=1)
        x = F.relu(self.f1(x))
        x = F.relu(self.f2(x))
        x = F.relu(self.f3(x))
        x = F.sigmoid(self.f4(x))
        return x

# Instantiate the model
obs_model = NN()

# Load the model
obs_model.load_state_dict(torch.load('obs_model_4.pth',map_location=torch.device('cpu') ,weights_only=True))

obs_model.eval()
obs_model(torch.tensor([0.5, 0.5]).unsqueeze(0), torch.tensor([0.5, 0.0, 0.0, 0.5]).unsqueeze(0))

tensor([[0.2173]], grad_fn=<SigmoidBackward0>)

# DISCRETE BELIEF UPDATE TESTING

In [4]:
class POMDPAgent():
    
    def __init__(self, env: POMDPDeformedGridworld, discretization=10, update='discrete_exact', obs_model=None):
        self.env = env

        if update == 'discrete_modelled' or update == 'discrete_exact':
            stretch = np.linspace(.4, 1, discretization)
            # shear = np.linspace(0,0, discretization)
            xa,xb = np.meshgrid(stretch, stretch) # , shear, shear
            positions = np.column_stack([xa.ravel(),xb.ravel()]), #  ya.ravel(),yb.ravel()
            positions = torch.tensor(positions, dtype=torch.float32)
            self.belief_points = positions.squeeze()
            self.belief_values = torch.ones(self.belief_points.shape[0], dtype=torch.float32) / len(positions)

        if update == 'discrete_modelled': 
            assert obs_model is not None, f'Need an observation model for discrete_modelled belief update, given {obs_model}'
            self.obs_model = obs_model
            self.belief_update = self.discrete_belief_update
        elif update == 'discrete_exact':
            self.belief_update = self.exact_belief_update
        elif update == 'variational':
            from utils.belief import BetaVariationalBayesianInference
            assert obs_model is not None, f'Need an observation model for variational belief update, given {obs_model}'
            self.VI = BetaVariationalBayesianInference(obs_model, input_dim=2, latent_dim=4)

            self.belief_update = self.variational_belief_update
            self.X_history = [self.env.get_state()['pos']]
            self.y_history = [self.env.get_state()['obs']]
        else:
            raise ValueError('Invalid belief update method')
        
        self.original_def = env.transformation_matrix[0][0], env.transformation_matrix[1][1]
        
    def act(self):
        # action = 
        pomdp_state, reward, terminated,truncated, info = self.env.step(int(action))
        print(pomdp_state)
        self.belief_update(pomdp_state)

    def discrete_belief_update(self, pomdp_state):
        """discrete belief update"""
        pos = pomdp_state['pos']
        obs = pomdp_state['obs']

        batch_pos = pos.repeat(len(self.belief_points), 1)
        
        # need theta because working on two parameters only in this example
        theta = torch.cat([self.belief_points, torch.zeros(len(self.belief_points), 2)], dim=1)
        # permute theta to match the order of pos
        theta = theta[:, [0,3,2,1]]
        

        predictions = self.obs_model(batch_pos,theta)
        likelihood = torch.exp(torch.distributions.Bernoulli(predictions).log_prob(obs))

        tmp = likelihood.squeeze() * self.belief_values
        self.belief_values = tmp  / tmp.sum()
    
    def exact_belief_update(self, pomdp_state):
        """discrete belief update"""
        obs = pomdp_state['obs']
        pos = pomdp_state['pos']

        def f():
            likelihood = []
            for x in self.belief_points:
                try:
                    self.env.set_deformation([x[0], x[1]],[0,0]) # stretch, shear format
                    likelihood.append(torch.all(torch.tensor(self.env.observe(list(pos))) == obs))
                except:
                    raise ValueError('Invalid belief point x', x)
            self.env.set_deformation(self.original_def, [0,0])
            return torch.tensor(likelihood, dtype=torch.float32)

        
        likelihood = f()
        self.belief_values =  likelihood * self.belief_values
        self.belief_values = self.belief_values / self.belief_values.sum()

    def variational_belief_update(self, pomdp_state):
        self.X_history.append(pomdp_state['pos'])
        self.y_history.append(pomdp_state['obs'])

        # X = posizione dell'agente (x,y)
        X = torch.stack(self.X_history)

        # ossevrazioni dell'agente negli stati pos=(x,y)
        y = torch.stack(self.y_history)

        # Create and fit the model
        self.VI.fit(X, y, n_epochs=10, lr=0.05)

    def render_act(self):
        """For testing belief convergence"""
        self.env.render()       
        pomdp_state = self.env.get_state()
        if torch.any(pomdp_state['pos'] != self.X_history[-1]):
            self.belief_update(pomdp_state)

In [5]:
from PIL import Image
import torch
from io import BytesIO

# Modify belief_plot to save as an image
def belief_plot(agent):
    import matplotlib.pyplot as plt
    plt.figure()    
    # (Add your plotting logic here)
    plt.imshow(agent.belief_values.detach().numpy().reshape(50,50))
    # Save the figure to an in-memory buffer
    buf = BytesIO()
    plt.savefig(buf, format="png")  # Save as PNG into the buffer
    plt.close()  # Close the plot to avoid memory leaks
    buf.seek(0)  # Move to the beginning of the buffer

    # Open the image from the buffer
    return Image.open(buf)    

def create_gif(images, filename="belief_animation.gif", duration=100, loop=0):
    """
    Create a GIF from a list of PIL Image objects.
    
    Args:
        images (list): A list of PIL Image objects.
        filename (str): Name of the output GIF file.
        duration (int): Duration of each frame in milliseconds.
        loop (int): Number of times to loop the GIF. 0 means infinite.
    """
    if images:
        images[0].save(
            filename,
            save_all=True,
            append_images=images[1:],
            duration=duration,
            loop=loop
        )
        print(f"GIF saved as {filename}")
    else:
        print("No images to create a GIF.")

In [6]:
pomdp_env = POMDPDeformedGridworld(obs_type='single')
pomdp_env.reset()
pomdp_env.set_deformation([0.6, 0.6],[0,0])

agent = POMDPAgent(pomdp_env,50, update='discrete_modelled', obs_model=obs_model)
# agent = POMDPAgent(pomdp_env,50,update='discrete_exact')

images = []
b = agent.belief_points[torch.argmax(agent.belief_values)]
img = belief_plot(agent)
images.append(img)      

print(b)
while True:
    try:
        agent.render_act()
        img = belief_plot(agent)
        images.append(img)      
        # assert torch.allclose(agent.belief_values.sum(), torch.tensor([1.0])), f"Belief values do not sum to 1: {agent.belief_values.sum()}"
        if torch.any(b != agent.belief_points[torch.argmax(agent.belief_values)]):
            b = agent.belief_points[torch.argmax(agent.belief_values)]
            print(b)
    except:
        print('Error')
        break



  positions = torch.tensor(positions, dtype=torch.float32)


tensor([0.4000, 0.4000])
Error


In [15]:
create_gif(images, filename="belief_animation_single_obsmodel.gif", duration=100, loop=0)

GIF saved as belief_animation_single_obsmodel.gif


# VARIATIONAL UPDATE

In [7]:
import torch
import torch.nn as nn
import torch.distributions as dist
from torch.optim import Adam
import numpy as np

class BetaVariationalBayesianInference:
    def __init__(self, f, input_dim, latent_dim=1, hidden_dim=32):
        """
        Initialize the variational Bayesian inference model with Beta distributions.
        
        Args:
            f: callable, the known function linking X and y through theta
            input_dim: int, dimension of input X
            latent_dim: int, dimension of latent parameter theta
            hidden_dim: int, dimension of hidden layers in the neural network
        """
        self.f = f
        self.input_dim = input_dim
        self.latent_dim = latent_dim
        
        # Variational parameters (alpha and beta of q(theta))
        # Initialize to reasonable values (alpha=beta=2 gives a symmetric Beta)
        self.q_alpha = nn.Parameter(2 * torch.ones(latent_dim))
        self.q_beta = nn.Parameter(2 * torch.ones(latent_dim))
        
        # Prior parameters (can be customized)
        self.prior_alpha = torch.ones(latent_dim)  # Default to Beta(1,1) = Uniform(0,1)
        self.prior_beta = torch.ones(latent_dim)
        
        self.low = torch.tensor([.4, -.2, -.2, .4])
        self.high = torch.tensor([1.0, .2, .2, 1.0])

    def sample_latent(self, n_samples=1):
        """
        Sample from the variational distribution q(theta) using the Beta distribution
        """
        q_dist = dist.Beta(self.q_alpha, self.q_beta)
        theta =  q_dist.rsample((n_samples,))
        return self.low + (self.high - self.low) * theta
    
    def elbo(self, X, y, n_samples=10):
        """
        Compute the evidence lower bound (ELBO) with Beta distributions
        
        Args:
            X: torch.Tensor, input data (batch_size, input_dim)
            y: torch.Tensor, observations (batch_size,)
            n_samples: int, number of Monte Carlo samples
        """
        batch_size = X.shape[0]
        
        # Sample from variational distribution
        theta_samples = self.sample_latent(n_samples)  # (n_samples, latent_dim)
        
        # Compute log likelihood for each sample
        log_likelihood = torch.zeros(n_samples, batch_size)
        for i in range(n_samples):
            theta = theta_samples[i]
            y_pred = self.f(X, theta.expand(batch_size, -1)).squeeze()
            log_likelihood[i] = dist.Bernoulli(y_pred).log_prob(y)
        
        # Average over samples
        expected_log_likelihood = torch.mean(log_likelihood, dim=0).sum()
        
        # Compute KL divergence between Beta distributions
        q_dist = dist.Beta(self.q_alpha, self.q_beta)
        prior_dist = dist.Beta(self.prior_alpha, self.prior_beta)
        kl_div = dist.kl_divergence(q_dist, prior_dist).sum()
        
        return expected_log_likelihood - kl_div
    
    def fit(self, X, y, n_epochs=100, batch_size=64, lr=0.1):
        """
        Fit the model using variational inference
        
        Args:
            X: torch.Tensor, input data
            y: torch.Tensor, observations
            n_epochs: int, number of training epochs
            batch_size: int, batch size for stochastic optimization
            lr: float, learning rate
        """
        optimizer = Adam([self.q_alpha, self.q_beta], lr=lr)
        
        dataset = torch.utils.data.TensorDataset(X, y)
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False)
        
        for epoch in range(n_epochs):
            epoch_loss = 0
            for batch_X, batch_y in dataloader:
                optimizer.zero_grad()
                loss = -self.elbo(batch_X, batch_y, n_samples=100)  # Negative because we minimize
                loss.backward()
                optimizer.step()
                
                # Ensure parameters stay positive
                with torch.no_grad():
                    self.q_alpha.data.clamp_(min=1e-6)
                    self.q_beta.data.clamp_(min=1e-6)
                
                epoch_loss += loss.item()
            
            if (epoch + 1) % 10 == 0:
                mean = self.q_alpha.detach() / (self.q_alpha.detach() + self.q_beta.detach())
                mean = self.low + (self.high - self.low) * mean
                print(f"Epoch {epoch+1}/{n_epochs}, Loss: {epoch_loss/len(dataloader):.4f}")
                print(f"Estimated theta mean: {mean}")
    
    def get_posterior_params(self):
        """Return the learned posterior parameters"""
        return {
            'alpha': self.q_alpha.detach(),
            'beta': self.q_beta.detach(),
            'mean': self.low + (self.high - self.low) * (self.q_alpha / (self.q_alpha + self.q_beta)).detach(),
            'mode': ((self.q_alpha - 1) / (self.q_alpha + self.q_beta - 2)).detach()
        }

In [7]:
pomdp_env = POMDPDeformedGridworld(obs_type='single')
pomdp_env.reset()
print(pomdp_env.transformation_matrix)

agent = POMDPAgent(pomdp_env,50, update='variational', obs_model=obs_model)

while True:
    try:
        agent.render_act()
    except:
        print('Error')
        break



[[0.44897147790195435, 0.1206244380337072], [0.18191284832518556, 0.45778315464858027]]
Epoch 10/10, Loss: 4.9005
Estimated theta mean: tensor([ 0.6263, -0.0488,  0.0423,  0.7618])
Epoch 10/10, Loss: 2.3943
Estimated theta mean: tensor([ 0.5534, -0.0953,  0.0815,  0.8185])
Epoch 10/10, Loss: 2.1555
Estimated theta mean: tensor([ 0.5076, -0.0999,  0.1049,  0.8586])
Epoch 10/10, Loss: 17.2414
Estimated theta mean: tensor([ 0.5498, -0.0683,  0.0821,  0.8346])
Epoch 10/10, Loss: 26.6275
Estimated theta mean: tensor([ 0.5797, -0.0340,  0.0521,  0.7896])
Epoch 10/10, Loss: 31.6377
Estimated theta mean: tensor([ 0.5688, -0.0189,  0.0123,  0.7305])
Epoch 10/10, Loss: 35.5514
Estimated theta mean: tensor([ 0.5633, -0.0077, -0.0244,  0.7030])
Epoch 10/10, Loss: 35.2344
Estimated theta mean: tensor([ 0.5395,  0.0016, -0.0367,  0.7080])
Epoch 10/10, Loss: 33.1707
Estimated theta mean: tensor([ 0.5342,  0.0339, -0.0503,  0.7231])
Epoch 10/10, Loss: 35.7730
Estimated theta mean: tensor([ 0.5032,  0.

In [12]:
pomdp_env.transformation_matrix

[[0.9547648571554864, -0.09006852783503004],
 [0.17217612734153898, 0.6915637745069985]]