In [62]:
#!/usr/bin/env python3
import argparse
import gym
# This will register the gym_minigrid envs
from gym_minigrid import envs, wrappers
from gym_minigrid.minigrid import OBJECT_TO_IDX, IDX_TO_OBJECT


AGENT_DIR_TO_STR = {0: ">", 1: "V", 2: "<", 3: "^"}
DEFAULT_ENV = 'MiniGrid-SimpleCrossingS9N1-v0'#'MiniGrid-SimpleCrossingS9N1-v0'  # 'MiniGrid-FourRooms-v0'

In [63]:
# Let's get the arguments
parser = argparse.ArgumentParser()
parser.add_argument("--env", type=str, default=DEFAULT_ENV, required=False, help=F"Name of the environment (default: {DEFAULT_ENV})")
args = parser.parse_args(args=[])

In [64]:
# Just play around to show what we can do
print(f"\nRunning over {args.env}")
environment = gym.make(args.env, render_mode="human") #creo el enviormente
environment = wrappers.SymbolicObsWrapper(environment)
"""
wrappers.SymbolicObsWrapper:
    Fully observable grid with a symbolic state representation.
    The symbol is a triple of (X, Y, IDX), where X and Y are
    the coordinates on the grid, and IDX is the id of the object.
"""



Running over MiniGrid-SimpleCrossingS9N1-v0


  deprecation(
  deprecation(


'\nwrappers.SymbolicObsWrapper:\n    Fully observable grid with a symbolic state representation.\n    The symbol is a triple of (X, Y, IDX), where X and Y are\n    the coordinates on the grid, and IDX is the id of the object.\n'

In [65]:
obs = environment.reset()
print("\n\nEnvironment loaded")
print("This is our world:\n")
print(obs['image'])



Environment loaded
This is our world:

[[[ 0  0  2]
  [ 0  1  2]
  [ 0  2  2]
  [ 0  3  2]
  [ 0  4  2]
  [ 0  5  2]
  [ 0  6  2]
  [ 0  7  2]
  [ 0  8  2]]

 [[ 1  0  2]
  [ 1  1 -1]
  [ 1  2 -1]
  [ 1  3 -1]
  [ 1  4 -1]
  [ 1  5 -1]
  [ 1  6  2]
  [ 1  7 -1]
  [ 1  8  2]]

 [[ 2  0  2]
  [ 2  1 -1]
  [ 2  2 -1]
  [ 2  3 -1]
  [ 2  4 -1]
  [ 2  5 -1]
  [ 2  6 -1]
  [ 2  7 -1]
  [ 2  8  2]]

 [[ 3  0  2]
  [ 3  1 -1]
  [ 3  2 -1]
  [ 3  3 -1]
  [ 3  4 -1]
  [ 3  5 -1]
  [ 3  6  2]
  [ 3  7 -1]
  [ 3  8  2]]

 [[ 4  0  2]
  [ 4  1 -1]
  [ 4  2 -1]
  [ 4  3 -1]
  [ 4  4 -1]
  [ 4  5 -1]
  [ 4  6  2]
  [ 4  7 -1]
  [ 4  8  2]]

 [[ 5  0  2]
  [ 5  1 -1]
  [ 5  2 -1]
  [ 5  3 -1]
  [ 5  4 -1]
  [ 5  5 -1]
  [ 5  6  2]
  [ 5  7 -1]
  [ 5  8  2]]

 [[ 6  0  2]
  [ 6  1 -1]
  [ 6  2 -1]
  [ 6  3 -1]
  [ 6  4 -1]
  [ 6  5 -1]
  [ 6  6  2]
  [ 6  7 -1]
  [ 6  8  2]]

 [[ 7  0  2]
  [ 7  1 -1]
  [ 7  2 -1]
  [ 7  3 -1]
  [ 7  4 -1]
  [ 7  5 -1]
  [ 7  6  2]
  [ 7  7  8]
  [ 7  8  2]]

 [[ 8  

In [66]:
print("\nThis is a list of rows (X is constant). Each row is a list of cells. Each cell has three components.")
print("First and second components are X-axis and Y-axis coordinates (column, row), but inverted. Third component is what there is in the cell (value -1 equals nothing):\n")
for object, idx in OBJECT_TO_IDX.items():
    print(f"\t{object}: {idx}")


This is a list of rows (X is constant). Each row is a list of cells. Each cell has three components.
First and second components are X-axis and Y-axis coordinates (column, row), but inverted. Third component is what there is in the cell (value -1 equals nothing):

	unseen: 0
	empty: 1
	wall: 2
	floor: 3
	door: 4
	key: 5
	ball: 6
	box: 7
	goal: 8
	lava: 9
	agent: 10


In [93]:
# Check cells (y conseguir la pos del agente)
print(
    f"\nThe agent is now in {environment.agent_pos[0], environment.agent_pos[1]}.")



The agent is now in (2, 3).


In [68]:
def print_world(image, agent_dir, agent_pos):
    for y_axis in image:
        print("\n\t")
        for cell in y_axis:
            cell_render = AGENT_DIR_TO_STR[agent_dir] if (cell[1] == agent_pos[0] and cell[0] == agent_pos[1]) \
                else IDX_TO_OBJECT[cell[2]][0].upper() if cell[2] > -1 else '_'
            print(cell_render, end='   ')

In [150]:
#So the world looks like
print("\nSo this is our pretty version of our world: ")
print_invert_world(invert_world(obs['image']), environment.agent_dir, agent_pos)


So this is our pretty version of our world: 

	
W   W   W   W   W   W   W   W   W   
	
W   ^   _   _   _   _   W   _   W   
	
W   _   _   _   _   _   _   _   W   
	
W   _   _   _   _   _   W   _   W   
	
W   _   _   _   _   _   W   _   W   
	
W   _   _   _   _   _   W   _   W   
	
W   _   _   _   _   _   W   _   W   
	
W   _   _   _   _   _   W   G   W   
	
W   W   W   W   W   W   W   W   W   

In [170]:
#So the world looks like
print("\nSo this is our pretty version of our world: ")
print_world(obs['image'], environment.agent_dir, environment.agent_pos)




So this is our pretty version of our world: 

	
W   W   W   W   W   W   W   W   W   
	
W   _   _   _   _   _   W   _   W   
	
W   _   _   _   _   _   _   _   W   
	
W   _   _   _   _   _   W   _   W   
	
W   _   _   _   _   _   W   _   W   
	
W   _   _   _   _   _   W   _   W   
	
W   _   _   _   _   _   W   V   W   
	
W   _   _   _   _   _   W   G   W   
	
W   W   W   W   W   W   W   W   W   

In [71]:
"""def find_wall(pos_ini,pos_fin,vec):
    if pos_ini < pos_fin:
        while pos_ini<pos_fin:
            if vec[pos_ini] == 2:
                wall.append(pos_fin-pos_ini)
                break;
            else:
                i+=1
    else:       
        while pos_ini>pos_fin:
            if vec[pos_ini] == 2:
                wall.append(pos_ini-pos_fin)
                break;
            else:
                pos_ini-=1
                """

'def find_wall(pos_ini,pos_fin,vec):\n    if pos_ini < pos_fin:\n        while pos_ini<pos_fin:\n            if vec[pos_ini] == 2:\n                wall.append(pos_fin-pos_ini)\n                break;\n            else:\n                i+=1\n    else:       \n        while pos_ini>pos_fin:\n            if vec[pos_ini] == 2:\n                wall.append(pos_ini-pos_fin)\n                break;\n            else:\n                pos_ini-=1\n                '

In [72]:
def matrix_value(i, j, matrix):
    return matrix[j][i][2]


In [96]:
def goal_distance(agent_pos_x, agent_pos_y, world):
    l = len(world)
    for i in range(0, l):
        for j in range(0, l):
            if matrix_value(i, j, world) == 8:
                return [abs(i-agent_pos_x), abs(j-agent_pos_y)]

In [74]:
# [wall_east,wall_south,wall_west,wall_north] -------------- NOT INVERTED !
def walls_axis(agent_pos_x, agent_pos_y, world):
    l = len(world)
    wall = []
    curiosity = 3
    for i in range(agent_pos_x, l):
        if matrix_value(i, agent_pos_y, world) == 2 or matrix_value(i, agent_pos_y, world) == 9:  # wall or lava
            wall.append(i-agent_pos_x)
            break
        elif matrix_value(i, agent_pos_y, world) == 0:  # unseen # TODO: check var curiosity
            wall.append(i-agent_pos_x + curiosity)
            break
    for j in range(agent_pos_y, l):
        if matrix_value(agent_pos_x, j, world) == 2 or matrix_value(i, agent_pos_y, world) == 9:  # wall or lava
            wall.append(j-agent_pos_y)
            break
        elif matrix_value(agent_pos_x, j, world) == 0:  # unseen
            wall.append(j-agent_pos_y + curiosity)
            break

    for i in range(agent_pos_x, -1, -1):
        if matrix_value(i, agent_pos_y, world) == 2 or matrix_value(i, agent_pos_y, world) == 9:  # wall or lava
            wall.append(agent_pos_x-i)
            break
        elif matrix_value(i, agent_pos_y, world) == 0:  # unseen
            wall.append(agent_pos_x-i + curiosity)
            break

    for j in range(agent_pos_y, -1, -1):
        if matrix_value(agent_pos_x, j, world) == 2 or matrix_value(agent_pos_x, j, world) == 9:  # wall or lava
            wall.append(agent_pos_y-j)
            break
        elif matrix_value(agent_pos_x, j, world) == 0:  # unseen
            wall.append(agent_pos_y-j + curiosity)
            break

    return wall


In [75]:
#calcula la distancia a cada parede segun su sentido de la orietacion
def walls(agent_pos_x,agent_pos_y,world): #world -> obs['image'] invertido
    walls = walls_axis(agent_pos_x,agent_pos_y,world)
    ad = environment.agent_dir
    wall_dirs = []
    for i in range(ad,ad+4):
        wall_dirs.append(walls[i%4])
    # print(walls)#(este, sur, oeste, norte)
    return wall_dirs #(frente,derecha,atras,izquierda)

In [149]:
print('walls distance frbl ', walls(
    environment.agent_pos[0], environment.agent_pos[1], obs['image']))
print('goal distance ', goal_distance(
    environment.agent_pos[0], environment.agent_pos[1], obs['image']))


walls distance frbl  [5, 1, 3, 1]
goal distance  [0, 2]


In [169]:
environment.step(environment.actions.right)



({'image': array([[[ 0,  0,  2],
          [ 0,  1,  2],
          [ 0,  2,  2],
          [ 0,  3,  2],
          [ 0,  4,  2],
          [ 0,  5,  2],
          [ 0,  6,  2],
          [ 0,  7,  2],
          [ 0,  8,  2]],
  
         [[ 1,  0,  2],
          [ 1,  1, -1],
          [ 1,  2, -1],
          [ 1,  3, -1],
          [ 1,  4, -1],
          [ 1,  5, -1],
          [ 1,  6,  2],
          [ 1,  7, -1],
          [ 1,  8,  2]],
  
         [[ 2,  0,  2],
          [ 2,  1, -1],
          [ 2,  2, -1],
          [ 2,  3, -1],
          [ 2,  4, -1],
          [ 2,  5, -1],
          [ 2,  6, -1],
          [ 2,  7, -1],
          [ 2,  8,  2]],
  
         [[ 3,  0,  2],
          [ 3,  1, -1],
          [ 3,  2, -1],
          [ 3,  3, -1],
          [ 3,  4, -1],
          [ 3,  5, -1],
          [ 3,  6,  2],
          [ 3,  7, -1],
          [ 3,  8,  2]],
  
         [[ 4,  0,  2],
          [ 4,  1, -1],
          [ 4,  2, -1],
          [ 4,  3, -1],
          [ 4, 

In [172]:

n_obs, reward, done, _ = environment.step(environment.actions.forward)
print('done',done) # RETURNS IN THE LAST MOVEMENT

done False


In [79]:
print(environment)


False
