Skip to content

Commit

Permalink
Merge pull request #2 from eidelen/randomsize
Browse files Browse the repository at this point in the history
Randomsize
  • Loading branch information
eidelen committed Jun 5, 2023
2 parents 5b77b12 + 94f45a3 commit 46a7ccd
Show file tree
Hide file tree
Showing 13 changed files with 223 additions and 114 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/testBomberEnv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
strategy:
max-parallel: 4
matrix:
python-version: [3.9]
python-version: ['3.10']

steps:
- uses: actions/checkout@v1
Expand Down
67 changes: 44 additions & 23 deletions bomberworld.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
# Author: Adrian Schneider, armasuisse
# Note: Initial copied from Giacomo Del Rio, IDSIA

from typing import Optional, Tuple
from typing import Optional, Tuple, List

import gymnasium as gym
import numpy as np
import copy
from random import randrange

# Best performance when size = 10 and no penalty on moving and allowed being close to bomb
# 10 x 10 = 100 stones - 6 = 94
Expand All @@ -15,7 +16,7 @@

class BomberworldEnv(gym.Env):

def __init__(self, size: int, max_steps: int, indestructible_agent=True, dead_near_bomb=False, dead_when_colliding=False, reduced_obs=False, move_penalty=-0.2, collision_penalty=-1.0,
def __init__(self, size: int | List[int], max_steps: int, indestructible_agent=True, dead_near_bomb=False, dead_when_colliding=False, reduced_obs=False, move_penalty=-0.2, collision_penalty=-1.0,
bomb_penalty=-1.0, close_bomb_penalty=-2.0, rock_reward=1.0, end_game_reward=10.0 ):
"""
Parameters
Expand Down Expand Up @@ -45,49 +46,69 @@ def __init__(self, size: int, max_steps: int, indestructible_agent=True, dead_ne
self.end_game_reward = end_game_reward

self.size = size
self.board_size = None
self.max_steps = max_steps
self.indestructible_agent = indestructible_agent
self.dead_near_bomb = dead_near_bomb
self.dead_when_colliding = dead_when_colliding
self.reduced_obs = reduced_obs
self.current_step = 0

self.agent_pos = (0, 0)
self.stones = np.full((self.size, self.size), True)
self.active_bombs = []

if self.reduced_obs:
self.observation_space = gym.spaces.Box(low=0, high=1, shape=(3 * 3,), dtype=np.float32)
else:
self.observation_space = gym.spaces.Box(low=0, high=1, shape=(size * size,), dtype=np.float32)
self.action_space = gym.spaces.Discrete(5)

# print info
print("Simple Bomber World") if self.indestructible_agent else print("Complex Bomber World")


def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) -> Tuple[np.ndarray, dict]:
super().reset(seed=seed)
self.current_step = 0
self.set_initial_board(tuple(self.np_random.integers(low=0, high=self.size, size=2)))

if type(self.size) is list: # randomly select a board size form the list
self.board_size = self.size[randrange(len(self.size))]

# normalize penalties and rewards relative to first size in list
main_size = self.size[0]

# reward = (total reward with main_size) / board_size
self.current_move_penalty = (self.move_penalty * (main_size ** 2)) / (self.board_size ** 2)
self.current_collision_penalty = (self.collision_penalty * (main_size ** 2)) / (self.board_size ** 2)
self.current_bomb_penalty = (self.bomb_penalty * (main_size ** 2)) / (self.board_size ** 2)
self.current_close_bomb_penalty = (self.close_bomb_penalty * (main_size ** 2)) / (self.board_size ** 2)
self.current_rock_reward = (self.rock_reward * (main_size ** 2)) / (self.board_size ** 2)
self.current_max_steps = (self.max_steps / (main_size ** 2)) * (self.board_size ** 2) # increase with board size
self.current_end_game_reward = self.end_game_reward # endgame reward independant of board size
else:
self.board_size = self.size
self.current_move_penalty = self.move_penalty
self.current_collision_penalty = self.collision_penalty
self.current_bomb_penalty = self.bomb_penalty
self.current_close_bomb_penalty = self.close_bomb_penalty
self.current_rock_reward = self.rock_reward
self.current_max_steps = self.max_steps
self.current_end_game_reward = self.end_game_reward # endgame reward independant of board size

self.set_initial_board(self.board_size, tuple(self.np_random.integers(low=0, high=self.board_size, size=2)))
return self.make_observation(), {}

def set_initial_board(self, agent_pos):
self.stones = np.full((self.size, self.size), True)
def set_initial_board(self, size, agent_pos):
self.stones = np.full((size, size), True)
self.agent_pos = agent_pos
self.active_bombs = []

# initially remove all 8 stones around the agent
self.bomb_3x3(agent_pos)

def is_valid_pos(self, pos: Tuple[int, int]) -> bool:
m, n = pos
return (-1 < m < self.size) and (-1 < n < self.size)
return (-1 < m < self.board_size) and (-1 < n < self.board_size)

def can_move_to_pos(self, pos: Tuple[int, int]) -> bool:
return self.is_valid_pos(pos) and (not self.stones[pos])

def make_current_board_2D(self) -> np.ndarray:
board = np.zeros((self.size, self.size), dtype=np.float32)
board = np.zeros((self.board_size, self.board_size), dtype=np.float32)
# set rocks
for m, n in np.ndindex(self.stones.shape):
board[(m, n)] = self.rock_val if self.stones[(m, n)] else self.empty_val
Expand All @@ -105,9 +126,9 @@ def make_observation_2D(self) -> np.ndarray:
if self.reduced_obs: # cut 3x3 patch around agent
m_ap, n_ap = self.agent_pos
m_center = max(1, m_ap)
m_center = min(self.size-2, m_center)
m_center = min(self.board_size - 2, m_center)
n_center = max(1, n_ap)
n_center = min(self.size - 2, n_center)
n_center = min(self.board_size - 2, n_center)
return board[m_center-1:m_center+2, n_center-1:n_center+2]
else:
return board
Expand Down Expand Up @@ -158,14 +179,14 @@ def step(self, action: int) -> Tuple[np.ndarray, float, bool, bool, dict]:

if self.can_move_to_pos(next_pos):
self.agent_pos = next_pos
reward += self.move_penalty # penalty for each move
reward += self.current_move_penalty # penalty for each move
else:
reward += self.collision_penalty
reward += self.current_collision_penalty
if self.dead_when_colliding:
agent_killed = True

elif action == 4: # drop bomb at agent location
reward += self.bomb_penalty # penalty for each dropped bomb
reward += self.current_bomb_penalty # penalty for each dropped bomb
placed_bomb = self.agent_pos
if self.indestructible_agent:
self.active_bombs.append((self.agent_pos, 0)) # immediate detonation
Expand All @@ -176,14 +197,14 @@ def step(self, action: int) -> Tuple[np.ndarray, float, bool, bool, dict]:
still_active_bombs = []
for bomb_pos, step_timer in self.active_bombs:
if step_timer <= 0:
reward += self.rock_reward * self.bomb_3x3(bomb_pos) # detonate bomb
reward += self.current_rock_reward * self.bomb_3x3(bomb_pos) # detonate bomb
exploded_bomb = bomb_pos

if not self.indestructible_agent:
# check that agent is in safe distance
squared_dist = (bomb_pos[0]-self.agent_pos[0])**2 + (bomb_pos[1]-self.agent_pos[1])**2
if squared_dist < 4.0:
reward += self.close_bomb_penalty
reward += self.current_close_bomb_penalty
if self.dead_near_bomb:
agent_killed = True
else:
Expand All @@ -193,12 +214,12 @@ def step(self, action: int) -> Tuple[np.ndarray, float, bool, bool, dict]:

# mission completed when every rock was bombed
if (self.stones == False).all():
reward += self.end_game_reward
reward += self.current_end_game_reward
terminated = True
else:
terminated = False

if self.current_step > self.max_steps or agent_killed: # end game when max step reached or agent killed
if self.current_step > self.current_max_steps or agent_killed: # end game when max step reached or agent killed
truncate = True
else:
truncate = False
Expand Down
16 changes: 12 additions & 4 deletions bomberworld_plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,15 @@ def __init__(self, size: int, animated_gif_folder_path: Optional[Union[str, Path
self.ordered_file_list = []
self.agent_traj: List[Tuple[int, int]] = []
self.bomb_traj: List[Tuple[int, int]] = []
self.current_agent_pos: Tuple[int, int] = (0,0)
self.stones: np.ndarray = np.zeros((self.size, self.size), dtype=np.float32)
self.explosion: Tuple[int, int] = None
self.agent_shape = [[.2, .6], [.2, .3], [.3, .1], [.7, .1], [.8, .3], [.8, .6]]

def add_frame(self, agent_position: Tuple[int, int], placed_bomb: Tuple[int, int], exploded_bomb: Tuple[int, int], stones: np.ndarray ) -> None:
if placed_bomb is None:
self.agent_traj.append(agent_position)
self.current_agent_pos = agent_position
else:
self.bomb_traj.append(placed_bomb) # bomb placed -> agent did not move

Expand All @@ -48,8 +50,9 @@ def plot_episode(self, current_reward = None):
self.draw_grid(ax)

self.draw_stones(ax, self.stones)
self.draw_path(ax, self.agent_traj, color='red', line_width=1)
self.draw_bombs(ax, self.bomb_traj)
self.draw_current_agent_pos(ax, self.current_agent_pos)
#self.draw_path(ax, self.agent_traj, color='red', line_width=1)
#self.draw_bombs(ax, self.bomb_traj)
self.draw_agent(ax, self.agent_traj[0][0], self.agent_traj[0][1])
self.draw_explosion(ax, self.explosion)

Expand Down Expand Up @@ -84,7 +87,7 @@ def draw_bombs(ax: mpl.axes.Axes, bombs: List[Tuple[int, int]]):
index = 0
for m, n in bombs:
ax.add_patch(patches.Ellipse((n+0.5, m+0.5), width=0.8, height=0.8, ec="black", fill=False))
ax.text(n+0.3, m+0.6, str(index))
#ax.text(n+0.3, m+0.6, str(index))
index += 1

@staticmethod
Expand All @@ -102,6 +105,11 @@ def draw_stones(ax: mpl.axes.Axes, stones: np.ndarray):
if stones[(m,n)] < 0.1:
ax.add_patch(patches.Rectangle((n+0.125, m+0.125), width=0.75, height=0.75, ec='black', fc='grey', fill=True))

@staticmethod
def draw_current_agent_pos(ax: mpl.axes.Axes, pos: Tuple[int, int]):
m, n = pos
ax.add_patch(patches.Rectangle((n + 0.1, m + 0.1), width=0.8, height=0.8, ec='red', fc='red', fill=False, linewidth=6.0))

def draw_grid(self, ax: mpl.axes.Axes):
for i in range(self.size + 1):
ax.axhline(y=i, c='k', lw=2)
Expand Down Expand Up @@ -129,7 +137,7 @@ def create_animated_gif_from_episodes(self):
frames[0].save(gif_out_path, format='GIF',
append_images=frames[1:],
save_all=True,
duration=300, loop=0)
duration=75, loop=0)
print("Animated gif created, nbr imgs:", len(frames))
else:
print("Error: animated_gif_folder_path needs to be set in ctor")
Expand Down
4 changes: 2 additions & 2 deletions do_bombing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ def run_bombing(path_to_checkpoint: str, use_lstm: bool):
cell_size = 256
lstm_states = [np.zeros(cell_size, np.float32), np.zeros(cell_size, np.float32)]

env = bomberworld.BomberworldEnv(6, 60, dead_when_colliding=True, reduced_obs=True, indestructible_agent=False, dead_near_bomb=True)
env = bomberworld.BomberworldEnv(20, 2000, dead_when_colliding=True, reduced_obs=True)
o, info = env.reset()

plotter = BomberworldPlotter(size=env.size, animated_gif_folder_path="gifs")
plotter = BomberworldPlotter(size=env.board_size, animated_gif_folder_path="gifs")
plotter.add_frame(env.agent_pos, None, None, env.make_current_board_2D())

reward_sum = 0
Expand Down
Binary file added rsc/6,8x6,8-trained-10x10.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added rsc/6,8x6,8-trained-12x12.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added rsc/6,8x6,8-trained-20x20.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added rsc/6,8x6,8-trained-6x6.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added rsc/6,8x6,8-trained-7x7.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added rsc/6,8x6,8-trained-8x8.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added rsc/6,8x6,8-trained-9x9.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
11 changes: 6 additions & 5 deletions solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,16 @@ def resume_training():
if True:

# train hw:
hw = {"gpu": 0, "cpu": 3} # imac
#hw = {"gpu": 1, "cpu": 11} # adris
#hw = {"gpu": 0, "cpu": 3} # imac
hw = {"gpu": 1, "cpu": 11} # adris

env_params = {"size": 6, "max_steps": 60, "reduced_obs": True, "dead_when_colliding": True, "indestructible_agent": False, "dead_near_bomb": True}
env_params = {"size": [6, 8], "max_steps": 40, "reduced_obs": True, "dead_when_colliding": True}
#env_params = {"size": 6, "max_steps": 60, "reduced_obs": True, "dead_when_colliding": True, "indestructible_agent": False, "dead_near_bomb": True}
#env_params = {"size": 10, "max_steps": 100, "indestructible_agent": False, "dead_near_bomb": True}
# env_params = {"size": 10, "max_steps": 200, "dead_when_colliding": True, "dead_near_bomb": True, "indestructible_agent": False, "close_bomb_penalty": -1.0}
nn_model = [256, 128, 64]
activation = "relu"
description = "ReducedSmartBomber-6x6-Gamma=0.75-LSTM"
description = "ReducedSmartBomber-6to8x6to8-Gamma=0.75-LSTM"

grid_search_hypers(env_params, nn_model, activation, description, hw)
grid_search_hypers(env_params, nn_model, activation, description, hw, use_lstm=True)

Loading

0 comments on commit 46a7ccd

Please sign in to comment.