# create observation model


poiché il Pombp ha bisogno di un modello probabilistico per le osservazioni, nello scenario reale è necessario costruirlo.

L'idea è uella di allenare una rete a mappare le osservazioni in un maze deformato in osservazioni del maze originale. in uesto modo




In [1]:
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque
import itertools
from tqdm import tqdm, trange

import os 
import sys

sys.path.append(os.path.abspath('..'))

from environment.env import MDPGYMGridEnvDeform, create_maze
import matplotlib.pyplot as plt



In [2]:
# maze size
N = 2

# thetas deformations (range(a,b),range(c,d))
l0 = 1
h0 = 5
l1 = 1
h1 = 5

maze = np.load("../maze/maze_2.npy")
env = MDPGYMGridEnvDeform(maze,l0,h0,l1,h1)

env.set_rendering()
env.render()

states = [((x,y,phi),(i,j)) for x in range(1,env.max_shape[0]-1) for y in range(1,env.max_shape[1]-1) for phi in range(4) for i in range(l0,h0) for j in range(l1,h1)] 
actions = [0,1,2,3]
obs = list(itertools.product([0,1], repeat=5))
thetas = [(i,j) for i in range(l0,h0) for j in range(l1,h1)]

state_dict = {state: i for i, state in enumerate(states)}
obs_dict = {obs : i for i, obs in enumerate(obs)}

# Actions are: 0-listen, 1-open-left, 2-open-right
lenS = len(states)
lenA = len(actions)
lenO = len(obs)

print(f"States: {lenS}, Actions: {lenA}, Observations {lenO}, Thetas {thetas}\n")



States: 9216, Actions: 4, Observations 32, Thetas [(1, 1), (1, 2), (1, 3), (1, 4), (2, 1), (2, 2), (2, 3), (2, 4), (3, 1), (3, 2), (3, 3), (3, 4), (4, 1), (4, 2), (4, 3), (4, 4)]



In [3]:
env.close()

In [4]:
env.render()

In [5]:
print(env.reset(state=(1,1)))
print(env.goal_pos)
env.render()

(OrderedDict([('x', tensor([3], dtype=torch.int32)), ('y', tensor([11], dtype=torch.int32)), ('phi', tensor([3], dtype=torch.int32)), ('theta', tensor([1, 1]))]), {})
[3 3]


In [6]:
def s_dict_to_array(s):
    return [s['x'].item(), s['y'].item(), s['phi'].item(),s['theta'][0].item(), s['theta'][1].item()]

def s_tuple_to_array(s):
    return np.array([s[0][0], s[0][1], s[0][2], s[1][0], s[1][1]])

def create_dataset(env: MDPGYMGridEnvDeform, size: int = 100):
    dataset = []
    for _ in tqdm(range(size)):
        s, _  = env.reset(state=(1,1))
        obs = env.get_observation()
        s = s_dict_to_array(s)
        # discard any position that is not in the maze
        if s[0] >= env.original_maze.shape[0] or s[1] >= env.original_maze.shape[1]:
            continue
        for theta in env.deformations:
            # come rimappare la posizione in base alla nuova theta?
            s_new = s[:2] * np.array(theta) 
            s_new = np.append(s_new, s[2])
            s_new = (tuple(s_new), theta)
            # print(s_new)
            obs_new = env.get_observation(s_new)

            dataset.append({
                'obs': obs,
                'obs_new': obs_new,
                'theta': theta,
                's_new': s_tuple_to_array(s_new),
                's': s
            })

    return dataset

dataset = create_dataset(env, 10000)

100%|██████████| 10000/10000 [00:02<00:00, 4871.33it/s]


In [7]:
print(len(dataset))
index = random.randint(0,len(dataset))
dataset[index]


4448


{'obs': array([0, 0, 0, 0, 1]),
 'obs_new': array([0, 0, 1, 1, 1]),
 'theta': (2, 2),
 's_new': array([4, 4, 3, 2, 2]),
 's': [2, 2, 3, 1, 1]}

# NN

In [23]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np

class CustomDataset(Dataset):
    def __init__(self, data_list):
        """
        Args:
            data_list (list): List of dictionaries containing 'o', 'theta', 'qpos', and 'qpos_new'.
        """
        self.data_list = data_list

    def __len__(self):
        """Returns the total number of samples."""
        return len(self.data_list)

    def __getitem__(self, idx):
        """
        Retrieve one sample of data by index.

        Args:
            idx (int): Index of the sample to retrieve.

        Returns:
            A dictionary with inputs and expected outputs as tensors.
        """
        # Extract the dictionary for the given index
        data = self.data_list[idx]
        
        # Convert data to PyTorch tensors
        o = torch.tensor(data['obs'].copy(), dtype=torch.float32)
        theta = torch.tensor(data['theta'], dtype=torch.float32)
        # s = torch.tensor(data['s'], dtype=torch.float32)
        s_new = torch.tensor(data['s_new'], dtype=torch.float32)
        o_new = torch.tensor(data['obs_new'].copy(), dtype=torch.float32)
        
        # Inputs: s_new, o, theta
        inputs = {
            'o_new': o_new,
            'theta': theta,
            'qpos_new': s_new
        }
        
        # Output: qpos_new
        target = {
            'o': o  
        }
        
        return inputs, target


# Instantiate the dataset
custom_dataset = CustomDataset(dataset)

# Create a DataLoader
data_loader = DataLoader(custom_dataset, batch_size=128, shuffle=True)

for inputs, target in data_loader:
    print((inputs['o_new'].shape,inputs['theta'].shape))
    print(inputs['o_new'])
    print(inputs['theta'])
    print(target['o'])
    break



(torch.Size([128, 5]), torch.Size([128, 2]))
tensor([[1., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0.],
        [0., 1., 1., 0., 0.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 1.],
        [0., 0., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 0.],
        [0., 1., 1., 1., 0.],
        [0., 0., 0., 0., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 0., 0.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 0., 0.],
        [0., 0., 1., 1., 1.],
        [1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 1., 1., 1., 1.],
        [0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1.],
        [0., 0., 1., 0., 0.],
        [1., 1., 1., 1., 1.],
        [0., 1., 1., 1., 0.],
        [1., 1., 1., 1., 1.],
        [1., 0., 0., 0., 

In [37]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class NN(nn.Module):
    def __init__(self, input_size, output_size):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128,128 )
        self.fc4 = nn.Linear(128, output_size)

    def forward(self, x, theta):
        y = torch.cat((x, theta), dim=1)
        y = F.relu(self.fc1(y))
        y = F.relu(self.fc2(y))
        y = F.relu(self.fc3(y))
        y = self.fc4(y)
        return F.sigmoid(y)
    def convert(self, x, theta):
        with torch.no_grad():
            if isinstance(obs, np.ndarray):
                obs.to_tensor()
            return self(x,theta)


In [38]:
# train network 
import torch.optim as optim
from tqdm.notebook import tqdm
# Instantiate the model
model = NN(7,5)


# Define the loss function
criterion = nn.BCELoss()

# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Set the model in training mode
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.train()

# tqdm progress bar
pbar = tqdm(total=len(data_loader),desc="Training")
pbar.reset()

# Iterate through the DataLoader
for epoch in range(22):
    running_loss = 0.0
    pbar.set_description(f"Epoch {epoch}")
    for i, (inputs, target) in enumerate(data_loader):
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs['o_new'].to(device),inputs['theta'].to(device))

        # Compute the loss
        loss = criterion(outputs, target['o'].to(device))

        # Backward pass
        running_loss += loss.item()
        loss.backward()

        # Update the weights
        optimizer.step()

        pbar.update(1)
    pbar.reset()
    print("runningLoss:", running_loss/len(data_loader))
    


Training:   0%|          | 0/35 [00:00<?, ?it/s]

runningLoss: 0.6694507496697563
runningLoss: 0.5767305186816625
runningLoss: 0.5028055914810725
runningLoss: 0.45215007747922625
runningLoss: 0.41584564191954476
runningLoss: 0.4003335441861834
runningLoss: 0.38750416806765964
runningLoss: 0.37209938423974176
runningLoss: 0.3644781538418361
runningLoss: 0.36324107561792646
runningLoss: 0.35562406948634556
runningLoss: 0.35460459589958193
runningLoss: 0.34963787879262653
runningLoss: 0.3525148408753531
runningLoss: 0.3509923483644213
runningLoss: 0.3468981189387185
runningLoss: 0.34498132807867865
runningLoss: 0.3505895435810089
runningLoss: 0.3447431445121765
runningLoss: 0.3426938014371054
runningLoss: 0.3437144534928458
runningLoss: 0.34113441961152213


In [39]:
model.convert(inputs['o_new'].to(device), inputs['theta'].to(device))

tensor([[9.9966e-01, 2.6625e-01, 2.6711e-01, 1.0000e+00, 9.9997e-01],
        [4.3664e-01, 2.4124e-01, 3.3297e-01, 2.4337e-01, 3.2795e-01],
        [4.9058e-01, 3.4631e-02, 3.6595e-02, 4.4373e-01, 5.0112e-01],
        [3.9065e-01, 3.9768e-01, 4.7873e-01, 3.1756e-01, 3.2006e-01],
        [9.9983e-01, 1.1416e-05, 7.3028e-05, 5.7830e-03, 3.2041e-03],
        [3.4766e-03, 9.9984e-01, 9.9730e-01, 9.8471e-01, 3.7504e-01],
        [5.0345e-01, 6.1568e-01, 4.9694e-01, 5.8481e-01, 4.8145e-01],
        [9.9949e-01, 9.9975e-01, 6.7739e-06, 4.9520e-05, 9.9991e-01],
        [8.4002e-04, 5.9936e-01, 9.9421e-01, 4.7782e-01, 8.2292e-03],
        [4.6005e-03, 8.9448e-03, 4.1150e-01, 1.0907e-02, 3.4018e-03],
        [7.9257e-01, 6.0732e-01, 3.9608e-01, 1.8459e-03, 1.9594e-02],
        [6.2502e-01, 7.2007e-01, 5.1207e-01, 7.9141e-01, 6.2942e-01],
        [6.2502e-01, 7.2007e-01, 5.1207e-01, 7.9141e-01, 6.2942e-01],
        [6.4234e-05, 1.0000e+00, 9.9999e-01, 9.9996e-01, 9.9864e-01],
        [9.9995e-01,

In [40]:
torch.distributions.Bernoulli(model.convert(inputs['o_new'].to(device), inputs['theta'].to(device))).sample()

tensor([[1., 1., 0., 1., 1.],
        [0., 0., 1., 0., 0.],
        [1., 0., 0., 0., 1.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 1., 1., 1., 0.],
        [0., 1., 1., 0., 1.],
        [1., 1., 0., 0., 1.],
        [0., 1., 1., 1., 0.],
        [0., 0., 1., 0., 0.],
        [0., 1., 1., 0., 0.],
        [1., 0., 1., 1., 1.],
        [1., 1., 0., 1., 1.],
        [0., 1., 1., 1., 1.],
        [1., 0., 0., 1., 1.],
        [1., 1., 0., 1., 1.],
        [1., 1., 1., 1., 1.],
        [0., 0., 1., 0., 0.],
        [1., 0., 0., 1., 1.],
        [1., 1., 1., 0., 0.],
        [0., 1., 1., 1., 0.],
        [1., 0., 0., 1., 1.],
        [0., 0., 1., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 1., 1., 1., 0.],
        [1., 1., 1., 0., 1.],
        [1., 0., 1., 0., 0.],
        [1., 0., 0., 1., 1.],
        [0., 0., 0., 0., 1.],
        [0., 1., 1., 1., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 1.],
        [0

In [32]:
target['o']


tensor([[0., 1., 1., 1., 0.],
        [0., 1., 1., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 1.],
        [1., 1., 0., 0., 1.],
        [0., 1., 1., 0., 0.],
        [0., 1., 1., 1., 0.],
        [0., 1., 1., 1., 1.],
        [0., 1., 1., 1., 1.],
        [1., 0., 0., 1., 1.],
        [0., 1., 1., 1., 0.],
        [0., 0., 0., 0., 0.],
        [0., 1., 1., 0., 0.],
        [0., 0., 1., 0., 0.],
        [1., 1., 0., 1., 1.],
        [0., 1., 1., 1., 0.],
        [0., 0., 0., 0., 1.],
        [1., 1., 0., 0., 1.],
        [1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 0., 1., 1.],
        [0., 0., 0., 0., 1.],
        [1., 1., 0., 1., 1.],
        [0., 1., 1., 0., 0.],
        [1., 0., 0., 0., 0.],
        [1., 1., 0., 1., 1.],
        [1., 0., 0., 1., 1.],
        [0., 1., 1., 1., 0.],
        [0., 0., 0., 0., 0.],
        [1., 1., 0., 1., 1.],
        [1., 0., 0., 0., 0.],
        [0

In [None]:
print(next(model.parameters()).device)

In [18]:
criterion(torch.tensor([0.,1.,1.,1.,1.],dtype=torch.float32),torch.tensor([1.,1.,1.,1.,1.],dtype=torch.float32))

tensor(0.2000)

In [None]:
P_o_theta = P(model.convert(inputs['o_new'].to(device), inputs['theta'].to(device)))