In [1]:
import numpy as np
import gymnasium as gym
from gymnasium.spaces import Dict, Discrete, Box
import torch

import os 
import sys

sys.path.append(os.path.abspath('..'))

from environment.env import ObservableDeformedGridworld

In [2]:
class POMDPDeformedGridworld(ObservableDeformedGridworld):
    def __init__(self):
        super(POMDPDeformedGridworld, self).__init__(render_mode='human')
        
        self.observation_space = Dict({
            'obs': Discrete(2),
            'pos': Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32)
        })
    
    def reset(self):
        state, _ = super().reset()
        pomdp_state = {
            'obs': torch.tensor([1], dtype=torch.float32) if super().is_in_obstacle(state['pos']) else torch.tensor([0], dtype=torch.float32),
            'pos': torch.tensor(state['pos'], dtype=torch.float32)
            }
        return pomdp_state, {}
    
    def step(self, action):
        state, reward, terminated, truncated, info = super().step(action)
        pomdp_state = {
            'obs': torch.tensor([1], dtype=torch.float32) if super().is_in_obstacle(state['pos']) else torch.tensor([0], dtype=torch.float32),
            'pos': torch.tensor(state['pos'], dtype=torch.float32)
            }
        return pomdp_state, reward, terminated,truncated, info
    
    def get_state(self):
        pomdp_state = {
            'obs': torch.tensor([1], dtype=torch.float32) if super().is_in_obstacle(self.state) else torch.tensor([0], dtype=torch.float32),
            'pos': torch.tensor(self.state, dtype=torch.float32)
            }
        return pomdp_state

In [14]:
pomdp_env = POMDPDeformedGridworld()
pomdp_env.reset()

({'obs': tensor([0.]), 'pos': tensor([0.0786, 0.6706])}, {})

In [11]:
# define Probability of observation o in position p and deformation theta
# P(o|p,theta) = p(\phi(o,p,theta))
# 
# but i only have a belief of the deformation theta

In [None]:
class POMDPAgent():
    def __init__(self, env: POMDPDeformedGridworld, obs_model):
        self.env = env
        self.obs_model = obs_model
        
        # assuming gaussian belief over 4 parameter deformation matrix
        self.belief = {
            'mean': torch.tensor([0.5, 0.0, 0.0, 0.5], dtype=torch.float32, requires_grad=True),
            'cov': torch.eye(4) * 0.1
        }
    
    def act(self):
        action = input('Enter action: ')
        pomdp_state, reward, terminated,truncated, info = self.env.step(int(action))
        print(pomdp_state)
        self.belief_update(pomdp_state)
    

    def render_act(self):
        """For testing belief convergence"""
        pomdp_state = self.env.get_state()
        # self.belief_update(pomdp_state)
        self.env.render()
        

    def belief_update(self, state):
        # assuming gaussian belief over 4 parameter deformation matrix
        # belief update using kalman filter
        mu = self.belief['mean']
        P = self.belief['cov']
        # predicted observation using current obs_model
        actual_obs = state['obs']
        print(state['pos'].unsqueeze(0),actual_obs.unsqueeze(0),mu.unsqueeze(0))
        rescale_obs = self.obs_model(state['pos'].unsqueeze(0),actual_obs.unsqueeze(0),mu.unsqueeze(0))
        p_o = torch.distributions.Bernoulli(rescale_obs)

        rescale_obs.backward()
        H = mu.grad.clone()
        # mu.grad.zero_()

        R = torch.eye(1) * 0.01

        mu_new, P_new = self.kalman_update(mu, P, actual_obs, H, R)
        # mu_new.grad.zero_()
        
        self.belief['mean'] = mu_new
        self.belief['cov'] = P_new
        

    def kalman_update(self, mu, P, z, H, R):
        """
        Perform the Kalman update step for the belief.
        - mu: Current belief mean
        - P: Current belief covariance
        - z: Observation (0 or 1)
        - H: Jacobian of the observation model
        - R: Observation noise covariance
        """
        # Compute the Kalman gain
        K = torch.matmul(P, H.T) * torch.inverse(H @ P @ H.T + R).squeeze(0)
        
        # Update the belief
        mu_new = mu + K * (z - H @ mu)
        P_new = P - K @ H * P
        
        mu_new.retain_grad()
        mu_new.grad.zero_()
        print('mu_new:', mu_new)
        return mu_new, P_new

In [62]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class NN(nn.Module):
    def __init__(self):
        super(NN, self).__init__()

        self.f1 = nn.Linear(7, 128)
        self.f2 = nn.Linear(128, 128)
        self.f3 = nn.Linear(128, 128)
        self.f4 = nn.Linear(128, 1)
        
    def forward(self, pos,deform_obs,theta):
        x = torch.cat([pos,deform_obs,theta], dim=1)
        x = F.relu(self.f1(x))
        x = F.relu(self.f2(x))
        x = F.relu(self.f3(x))
        x = F.sigmoid(self.f4(x))
        return x

# Instantiate the model
obs_model = NN()

# Load the model
obs_model.load_state_dict(torch.load('obs_model.pth'))

  obs_model.load_state_dict(torch.load('obs_model.pth'))


<All keys matched successfully>

In [63]:
obs_model.eval()
obs_model(torch.tensor([0.5, 0.5]).unsqueeze(0), torch.tensor([1.0]).unsqueeze(0), torch.tensor([0.5, 0.0, 0.0, 0.5]).unsqueeze(0))

tensor([[0.9985]], grad_fn=<SigmoidBackward0>)

In [64]:
agent = POMDPAgent(pomdp_env, obs_model)

In [66]:
while True:
    try:
        agent.render_act()
    except KeyboardInterrupt:
        break

error: display Surface quit

In [28]:
agent.belief


{'mean': tensor([ 0.5422, -0.0016,  0.0369,  0.2813], grad_fn=<AddBackward0>),
 'cov': tensor([[0.0423, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0423, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0423, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0423]])}

# Gaussian Belief Update

If b(M)b(M) is approximately Gaussian:
b(M)∼N(μ,Σ),
b(M)∼N(μ,Σ),

where μμ is the mean vector and ΣΣ is the covariance matrix of the parameters, the update can be performed analytically or approximately using a filtering approach.
Steps:

    Observation Model Approximation: Linearize O(o∣M)O(o∣M) around the current mean μμ using a first-order Taylor expansion:
    O(o∣M)≈O(o∣μ)+∇MO(o∣μ)⊤(M−μ),
    O(o∣M)≈O(o∣μ)+∇M​O(o∣μ)⊤(M−μ),

    where ∇MO(o∣μ)∇M​O(o∣μ) is the Jacobian of O(o∣M)O(o∣M) at μμ.

    Update Mean and Covariance: Using an Extended Kalman Filter (EKF) approach:
        Predict step: Keep μμ and ΣΣ unchanged before incorporating the observation.
        Update step:
        K=ΣH⊤(HΣH⊤+R)−1,
        K=ΣH⊤(HΣH⊤+R)−1,
        μ′=μ+K(o−h(μ)),
        μ′=μ+K(o−h(μ)),
        Σ′=Σ−KHΣ,
        Σ′=Σ−KHΣ, where:
            H=∇Mh(M)∣M=μH=∇M​h(M)∣M=μ​ is the Jacobian of the observation function h(M)h(M),
            RR is the observation noise covariance,
            KK is the Kalman gain.

    Update Belief: Replace (μ,Σ)(μ,Σ) with (μ′,Σ′)(μ′,Σ′).

For Vector Outputs (Multi-class or Multi-dimensional)

If h(M)h(M) outputs a vector instead of a scalar, compute the Jacobian as follows:

In [None]:
# Example for vector-valued outputs (e.g., multi-class observation model)
mu = torch.tensor([0.5, 0.5, 0.1, 0.1], requires_grad=True)
obs_pred = model(mu)  # Assume model now outputs a vector, e.g., shape (3,)
jacobian = torch.zeros(obs_pred.shape[0], mu.shape[0])  # Preallocate Jacobian (3x4)

# Compute Jacobian row by row
for i in range(obs_pred.shape[0]):
    model.zero_grad()  # Clear gradients
    obs_pred[i].backward(retain_graph=True)  # Backprop for the i-th output
    jacobian[i] = mu.grad  # Store the gradient
    mu.grad.zero_()  # Reset gradients for next iteration

print("Jacobian (multi-dimensional output):")
print(jacobian)


In [115]:
import torch
import torch.nn as nn

# Define the observation model (neural network)
class ObservationModel(nn.Module):
    def __init__(self):
        super(ObservationModel, self).__init__()
        self.fc1 = nn.Linear(4, 16)  # Input: 4D vector, hidden: 16 neurons
        self.fc2 = nn.Linear(16, 1)  # Output: scalar (0 or 1)
        self.activation = nn.Sigmoid()  # Sigmoid output (probability-like)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.activation(self.fc2(x))
        return x

# Kalman filter update step
def kalman_update(mu, P, z, H, R):
    """
    Perform the Kalman update step for the belief.
    - mu: Current belief mean
    - P: Current belief covariance
    - z: Observation (0 or 1)
    - H: Jacobian of the observation model
    - R: Observation noise covariance
    """
    # Compute the Kalman gain
    K = torch.matmul(P, H.T) * torch.inverse(H @ P @ H.T + R).squeeze(0)
    
    # Update the belief
    mu_new = mu + K * (z - H @ mu)
    P_new = P - K @ H * P
    
    return mu_new, P_new

# Initialize the neural network
model = ObservationModel()

# Current belief (Gaussian) parameters: mean (mu) and covariance (P)
mu = torch.tensor([0.5, 0.56, 0.1, 0.1], dtype=torch.float32, requires_grad=True)  # Example mean of the belief
P = torch.eye(4)  # Example covariance (identity for simplicity)

# The observation model is a neural network, so let's predict the observation
obs_pred = model(mu)  # Predicted observation for the current belief

# For simplicity, assume we have an observation (0 or 1)
z = torch.tensor([1.0])  # Example observed value

# Compute the Jacobian (derivative of the observation model w.r.t. mu)
obs_pred.backward()  # Compute the gradients of the prediction wrt mu
H = mu.grad.clone()   # The Jacobian is just the gradient of the output with respect to the input (mu)

# Reset gradients for the next iteration
mu.grad.zero_()

# Assume a simple observation noise model (R)
R = torch.tensor([[0.01]])  # Small observation noise

# Perform Kalman update
mu_new, P_new = kalman_update(mu, P, z, H, R)

print("Updated belief mean:", mu_new)
print("Updated belief covariance:", P_new)


Updated belief mean: tensor([-1.6752, -0.7079,  0.0612, -2.2841], grad_fn=<AddBackward0>)
Updated belief covariance: tensor([[0.8676, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.8676, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.8676, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.8676]])


In [93]:
# sample from a Gaussian distribution with mean mu and covariance P using pytorch
def sample_gaussian(mu, P):
    """
    Sample from a Gaussian distribution with mean mu and covariance P.
    
    Parameters:
        mu (torch.Tensor): Mean of the Gaussian (vector).
        P (torch.Tensor): Covariance matrix of the Gaussian.
    
    Returns:
        sample (torch.Tensor): Sampled value from the Gaussian.
    """
    # Generate a sample from a standard normal distribution
    sample = torch.randn_like(mu)
    
    # Perform Cholesky decomposition of the covariance matrix
    L = torch.linalg.cholesky(P)
    
    # Transform the sample to match the desired covariance
    sample = mu + L @ sample
    
    return sample

# Example usage
sample_gaussian(mu_new, P_new)

tensor([1.8000, 3.3302, 0.7704, 1.9053], grad_fn=<AddBackward0>)

In [86]:
# Compute the Kalman gain
print('shape P', P.shape)
print('shape H', H.shape)
print('shape R', R.shape)
K = torch.matmul(P, H.T) * torch.inverse(H @ P @ H.T + R).squeeze(0)

# Update the belief
mu_new = mu + K * (z - H @ mu)
P_new = P - K @ H * P



shape P torch.Size([4, 4])
shape H torch.Size([4])
shape R torch.Size([1, 1])


In [85]:
K@ H * P

tensor([[0.1259, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.1259, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.1259, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.1259]])

In [81]:
P_new = P - K @ H @ P

RuntimeError: both arguments to matmul need to be at least 1D, but they are 0D and 2D

tensor(-0.0114, grad_fn=<DotBackward0>)

In [70]:
torch.inverse(H @ P @ H.T + R).squeeze(0)

tensor([87.7421])

In [59]:
obs_pred = model(mu)  # Predicted observation for the current belief

# For simplicity, assume we have an observation (0 or 1)
z = torch.tensor([1.0])  # Example observed value

# Compute the Jacobian (derivative of the observation model w.r.t. mu)
obs_pred.backward()  # Compute the gradients of the prediction wrt mu
H = mu.grad.clone()  # The Jacobian is
H

tensor([-0.0451, -0.1558,  0.2192,  0.0721])

In [66]:
t = torch.tensor([[1.0, 2.0, 3.0, 4.0],[4,3,2,1]], dtype=torch.float32, requires_grad=True)
t
a = 3

t*a

tensor([[ 3.,  6.,  9., 12.],
        [12.,  9.,  6.,  3.]], grad_fn=<MulBackward0>)