In [22]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from pogema import pogema_v0, GridConfig
from gym.core import ObservationWrapper, RewardWrapper
from gym.spaces import Box

In [23]:
grid_config = GridConfig(num_agents=9,  # number of agents
                         size=30, # size of the grid
                         density=0.1,  # obstacle density
                         seed=1,  # set to None for random 
                                  # obstacles, agents and targets 
                                  # positions at each reset
                         max_episode_steps=10 ** 3,  # horizon
                         obs_radius=5,  # defines field of view
                         )

In [24]:
class PreprocessObs(ObservationWrapper):
    def __init__(self, env):
        ObservationWrapper.__init__(self, env)

    def observation(self, img):
        img = torch.Tensor(img)
        # img.transpose_(3, 1)
        return img

In [25]:
def compute_reward(action, is_done):
    is_done = torch.Tensor(is_done)
    action = torch.Tensor(action)
    
    reward = (1 - is_done) * (- 0.3 * (action == 0) - 0.5 * (action != 0)) 
    
    reward += 20 * is_done
    return reward

In [26]:
env = pogema_v0(grid_config=grid_config)
env.reset()
env.render()

[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m
[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m[37m . [0m
[37m . [0m[37m . [0m[37m . [0m[

In [27]:
class My_env:
    def __init__(self, grid_config=  GridConfig(num_agents=9,  # number of agents
                                     size=30, # size of the grid
                                     density=0.1,  # obstacle density
                                     seed=1,  # set to None for random 
                                              # obstacles, agents and targets 
                                              # positions at each reset
                                     max_episode_steps=10 ** 3,  # horizon
                                     obs_radius=5,  # defines field of view
                                     ), 
                        compute_reward=compute_reward):
        env = pogema_v0(grid_config=grid_config)
        env.reset()
        self.env = PreprocessObs(env)
        self.n_agents = grid_config.num_agents
    
    def reset(self):
        return self.env.reset()
        
    def step(self, action):
        next_s, _, is_done, info = self.env.step(action)
        reward = compute_reward(action, is_done)
        return next_s, reward, is_done, info
    
    def show_state(self, s):
        plt.figure(figsize=[16, 9])        
        for i, k in enumerate(s):
            plt.subplot(3, 3, i + 1)

            tr_k = k
            plt.imshow(tr_k)
            plt.grid()

        plt.show()

In [28]:
env = My_env()
env.reset()

tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 1.],
          [0., 1., 0.,  ..., 1., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]],

         [[0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]],

         [[1., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          ...,
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.],
          [0., 0., 0.,  ..., 0., 0., 0.]]],


        [[[0., 0., 0.,  ..., 0., 0., 0.],
          [1., 0., 0.,  ..., 0., 1., 0.],
          [0., 0., 0.,  ..., 1., 0., 0.],
          ...,
          [0., 1., 0.,  ..., 0., 0., 0.],
        

In [29]:
action = [env.env.action_space.sample() for _ in range(env.n_agents)]
env.step(action)

(tensor([[[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [1., 0., 1.,  ..., 0., 1., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]],
 
          [[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]],
 
          [[1., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           ...,
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.],
           [0., 0., 0.,  ..., 0., 0., 0.]]],
 
 
         [[[0., 0., 0.,  ..., 0., 0., 0.],
           [0., 1., 0.,  ..., 0., 0., 1.],
           [0., 0., 0.,  ..., 0., 1., 0.],
           ...,
           [0., 0., 1