Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changed multidiscrete to multibinary in normalization wrapper and add… #25

Merged
merged 2 commits into from
Aug 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 27 additions & 27 deletions diambra/arena/arena_gym.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,28 +105,28 @@ def env_info_process(self, env_info):
# Action dict
move_dict = {}
for idx in range(current_idx,
current_idx + 2*self.n_actions_but_comb[0], 2):
move_dict[int(env_info[idx])] = env_info[idx+1]
current_idx + 2 * self.n_actions_but_comb[0], 2):
move_dict[int(env_info[idx])] = env_info[idx + 1]

current_idx += 2*self.n_actions_but_comb[0]
current_idx += 2 * self.n_actions_but_comb[0]

attack_dict = {}
for idx in range(current_idx,
current_idx + 2*self.n_actions_but_comb[1], 2):
attack_dict[int(env_info[idx])] = env_info[idx+1]
current_idx + 2 * self.n_actions_but_comb[1], 2):
attack_dict[int(env_info[idx])] = env_info[idx + 1]

self.print_actions_dict = [move_dict, attack_dict]

current_idx += 2*self.n_actions_but_comb[1]
current_idx += 2 * self.n_actions_but_comb[1]

# Additional Obs map
number_of_add_obs = int(env_info[current_idx])
current_idx += 1
self.add_obs = {}
for idx in range(number_of_add_obs):
self.add_obs[env_info[current_idx]] = [int(env_info[current_idx+1]),
int(env_info[current_idx+2]),
int(env_info[current_idx+3])]
self.add_obs[env_info[current_idx]] = [int(env_info[current_idx + 1]),
int(env_info[current_idx + 2]),
int(env_info[current_idx + 3])]
current_idx += 4

# Return env action list
Expand All @@ -145,8 +145,8 @@ def print_actions(self):

# Return min max rewards for the environment
def get_min_max_reward(self):
return [self.minmax_reward[0]/(self.reward_normalization_value),
self.minmax_reward[1]/(self.reward_normalization_value)]
return [self.minmax_reward[0] / (self.reward_normalization_value),
self.minmax_reward[1] / (self.reward_normalization_value)]

# Step method to be implemented in derived classes
def step(self, action):
Expand Down Expand Up @@ -259,14 +259,14 @@ def __init__(self, env_settings):
action_space_dict = {}
for idx in range(2):
if env_settings["action_space"][idx] == "multi_discrete":
action_space_dict["P{}".format(idx+1)] =\
action_space_dict["P{}".format(idx + 1)] =\
spaces.MultiDiscrete(self.n_actions[idx])
print("Using MultiDiscrete action space for P{}".format(idx+1))
print("Using MultiDiscrete action space for P{}".format(idx + 1))
elif env_settings["action_space"][idx] == "discrete":
action_space_dict["P{}".format(idx+1)] =\
action_space_dict["P{}".format(idx + 1)] =\
spaces.Discrete(
self.n_actions[idx][0] + self.n_actions[idx][1] - 1)
print("Using Discrete action space for P{}".format(idx+1))
print("Using Discrete action space for P{}".format(idx + 1))
else:
raise Exception(
"Not recognized action space: {}".format(env_settings["action_space"][idx]))
Expand Down Expand Up @@ -353,13 +353,13 @@ def __init__(self, env_settings):
continue

if k[-2:] == "P1":
knew = "own"+k[:-2]
knew = "own" + k[:-2]
else:
knew = "opp"+k[:-2]
knew = "opp" + k[:-2]

# Discrete spaces (binary / categorical)
if v[0] == 0 or v[0] == 2:
player_spec_dict[knew] = spaces.Discrete(v[2]+1)
player_spec_dict[knew] = spaces.Discrete(v[2] + 1)
elif v[0] == 1: # Box spaces
player_spec_dict[knew] = spaces.Box(low=v[1], high=v[2],
shape=(), dtype=np.int32)
Expand Down Expand Up @@ -396,9 +396,9 @@ def add_obs_integration(self, frame, data):
continue

if k[-2:] == self.player_side:
knew = "own"+k[:-2]
knew = "own" + k[:-2]
else:
knew = "opp"+k[:-2]
knew = "opp" + k[:-2]

player_spec_dict[knew] = data[k]

Expand Down Expand Up @@ -451,12 +451,12 @@ def __init__(self, env_settings):
continue

if k[-2:] == "P1":
knew = "own"+k[:-2]
knew = "own" + k[:-2]
else:
knew = "opp"+k[:-2]
knew = "opp" + k[:-2]

if v[0] == 0 or v[0] == 2: # Discrete spaces
player_spec_dict[knew] = spaces.Discrete(v[2]+1)
player_spec_dict[knew] = spaces.Discrete(v[2] + 1)
elif v[0] == 1: # Box spaces
player_spec_dict[knew] = spaces.Box(low=v[1], high=v[2],
shape=(), dtype=np.int32)
Expand Down Expand Up @@ -500,15 +500,15 @@ def add_obs_integration(self, frame, data):
continue

if k[-2:] == elem:
knew = "own"+k[:-2]
knew = "own" + k[:-2]
else:
knew = "opp"+k[:-2]
knew = "opp" + k[:-2]

player_spec_dict[knew] = data[k]

actions_dict = {
"move": data["moveActionP{}".format(ielem+1)],
"attack": data["attackActionP{}".format(ielem+1)],
"move": data["moveActionP{}".format(ielem + 1)],
"attack": data["attackActionP{}".format(ielem + 1)],
}

player_spec_dict["actions"] = actions_dict
Expand Down
14 changes: 7 additions & 7 deletions diambra/arena/utils/gym_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,18 @@ def nested_dict_obs_space(space, k_list=[], level=0):
if isinstance(v, gym.spaces.dict.Dict):
k_list = k_list[0:level]
k_list.append(k)
nested_dict_obs_space(v, k_list, level=level+1)
nested_dict_obs_space(v, k_list, level=level + 1)
else:
k_list = k_list[0:level]
out_string = "observation_space"
indentation = " "*level
indentation = " " * level
for idk in k_list:
out_string += "[\"{}\"]".format(idk)
out_string += "[\"{}\"]".format(k)
out_string = indentation+out_string+":"
out_string = indentation + out_string + ":"
print(out_string, v)
if isinstance(v, gym.spaces.MultiDiscrete):
print(indentation+"Space size:", v.nvec.shape)
print(indentation + "Space size:", v.nvec.shape)
elif isinstance(v, gym.spaces.Discrete):
pass
elif isinstance(v, gym.spaces.Box):
Expand Down Expand Up @@ -161,10 +161,10 @@ def show_gym_obs(observation, char_list, wait_key=1, viz=True):
observation["frame"].shape)

if viz:
obs = np.array(observation["frame"]).astype(np.float32)/255
obs = np.array(observation["frame"]).astype(np.float32) / 255
else:
if viz:
obs = np.array(observation).astype(np.float32)/255
obs = np.array(observation).astype(np.float32) / 255

if viz:
cv2.imshow("Frame", obs[:, :, ::-1]) # rgb2bgr
Expand Down Expand Up @@ -205,7 +205,7 @@ def show_wrap_obs(observation, n_actions_stack, char_list, wait_key=1, viz=True)

if viz:
for idx in range(obs.shape[2]):
cv2.imshow("Frame-"+str(idx), obs[:, :, idx])
cv2.imshow("Frame-" + str(idx), obs[:, :, idx])

cv2.waitKey(wait_key)

Expand Down
12 changes: 8 additions & 4 deletions diambra/arena/wrappers/arena_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,22 +92,23 @@ def __init__(self, env, reward_normalization_factor):
:param rewardNormalizationFactor: multiplication factor
"""
gym.RewardWrapper.__init__(self, env)
self.env.reward_normalization_value = reward_normalization_factor*self.env.max_delta_health
self.env.reward_normalization_value = reward_normalization_factor * self.env.max_delta_health

def reward(self, reward):
"""
Nomralize reward dividing by reward normalization factor*max_delta_health
:param reward: (float)
"""
return float(reward)/float(self.env.reward_normalization_value)
return float(reward) / float(self.env.reward_normalization_value)

# Environment Wrapping (rewards normalization, resizing, grayscaling, etc)


def env_wrapping(env, player, no_op_max=0, sticky_actions=1, clip_rewards=False,
reward_normalization=False, reward_normalization_factor=0.5,
frame_stack=1, actions_stack=1, scale=False, scale_mod=0,
hwc_obs_resize=[84, 84, 0], dilation=1, hardcore=False):
hwc_obs_resize=[84, 84, 0], dilation=1, flatten_dict=False,
hardcore=False):
"""
Typical standard environment wrappers
:param env: (Gym Environment) the diambra environment
Expand Down Expand Up @@ -145,7 +146,7 @@ def env_wrapping(env, player, no_op_max=0, sticky_actions=1, clip_rewards=False,
else:
from diambra.arena.wrappers.obs_wrapper import WarpFrame, \
WarpFrame3C, FrameStack, FrameStackDilated,\
ActionsStack, ScaledFloatObsNeg, ScaledFloatObs
ActionsStack, ScaledFloatObsNeg, ScaledFloatObs, FlattenDictObs

if hwc_obs_resize[2] == 1:
# Resizing observation from H x W x 3 to
Expand Down Expand Up @@ -192,4 +193,7 @@ def env_wrapping(env, player, no_op_max=0, sticky_actions=1, clip_rewards=False,
else:
raise ValueError("Scale mod musto be either 0 or -1")

if flatten_dict:
env = FlattenDictObs(env)

return env
73 changes: 50 additions & 23 deletions diambra/arena/wrappers/obs_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from copy import deepcopy
import numpy as np
from collections import deque
from collections.abc import Mapping
import cv2 # pytype:disable=import-error
cv2.ocl.setUseOpenCL(False)

Expand Down Expand Up @@ -33,7 +34,7 @@ def scaled_float_obs_func(observation, observation_space):
buf_len = observation_space.spaces[k].nvec.shape[0]
actions_vector = np.zeros((buf_len * n_act), dtype=int)
for iact in range(buf_len):
actions_vector[iact*n_act + observation[k][iact]] = 1
actions_vector[iact * n_act + observation[k][iact]] = 1
observation[k] = actions_vector
elif isinstance(v_space, spaces.Discrete) and (v_space.n > 2):
var_vector = np.zeros(
Expand Down Expand Up @@ -136,8 +137,7 @@ def step(self, action):

# Add last obs n_frames - 1 times in case of
# new round / stage / continueGame
if ((info["round_done"] or info["stage_done"] or info["game_done"])
and not done):
if ((info["round_done"] or info["stage_done"] or info["game_done"]) and not done):
for _ in range(self.n_frames - 1):
self.frames.append(obs["frame"])

Expand Down Expand Up @@ -165,7 +165,7 @@ def __init__(self, env, n_frames, dilation):
self.dilation = dilation
# Keeping all n_frames*dilation in memory,
# then extract the subset given by the dilation factor
self.frames = deque([], maxlen=n_frames*dilation)
self.frames = deque([], maxlen=n_frames * dilation)
shp = self.observation_space["frame"].shape
self.observation_space.spaces["frame"] = spaces.Box(low=0, high=255,
shape=(
Expand All @@ -174,7 +174,7 @@ def __init__(self, env, n_frames, dilation):

def reset(self, **kwargs):
obs = self.env.reset(**kwargs)
for _ in range(self.n_frames*self.dilation):
for _ in range(self.n_frames * self.dilation):
self.frames.append(obs["frame"])
obs["frame"] = self.get_ob()
return obs
Expand All @@ -185,16 +185,15 @@ def step(self, action):

# Add last obs n_frames - 1 times in case of
# new round / stage / continueGame
if ((info["round_done"] or info["stage_done"] or info["game_done"])
and not done):
for _ in range(self.n_frames*self.dilation - 1):
if ((info["round_done"] or info["stage_done"] or info["game_done"]) and not done):
for _ in range(self.n_frames * self.dilation - 1):
self.frames.append(obs["frame"])

obs["frame"] = self.get_ob()
return obs, reward, done, info

def get_ob(self):
frames_subset = list(self.frames)[self.dilation-1::self.dilation]
frames_subset = list(self.frames)[self.dilation - 1::self.dilation]
assert len(frames_subset) == self.n_frames
return LazyFrames(list(frames_subset))

Expand All @@ -215,10 +214,10 @@ def __init__(self, env, n_actions_stack, n_players=1):
deque([0 for i in range(n_actions_stack)], maxlen=n_actions_stack))
self.attack_action_stack.append(
deque([0 for i in range(n_actions_stack)], maxlen=n_actions_stack))
self.observation_space.spaces["P{}".format(iplayer+1)].spaces["actions"].spaces["move"] =\
spaces.MultiDiscrete([self.n_actions[iplayer][0]]*n_actions_stack)
self.observation_space.spaces["P{}".format(iplayer+1)].spaces["actions"].spaces["attack"] =\
spaces.MultiDiscrete([self.n_actions[iplayer][1]]*n_actions_stack)
self.observation_space.spaces["P{}".format(iplayer + 1)].spaces["actions"].spaces["move"] =\
spaces.MultiDiscrete([self.n_actions[iplayer][0]] * n_actions_stack)
self.observation_space.spaces["P{}".format(iplayer + 1)].spaces["actions"].spaces["attack"] =\
spaces.MultiDiscrete([self.n_actions[iplayer][1]] * n_actions_stack)

def fill_stack(self, value=0):
# Fill the actions stack with no action after reset
Expand All @@ -233,30 +232,29 @@ def reset(self, **kwargs):

for iplayer in range(self.n_players):
obs["P{}".format(
iplayer+1)]["actions"]["move"] = self.move_action_stack[iplayer]
iplayer + 1)]["actions"]["move"] = self.move_action_stack[iplayer]
obs["P{}".format(
iplayer+1)]["actions"]["attack"] = self.attack_action_stack[iplayer]
iplayer + 1)]["actions"]["attack"] = self.attack_action_stack[iplayer]
return obs

def step(self, action):
obs, reward, done, info = self.env.step(action)
for iplayer in range(self.n_players):
self.move_action_stack[iplayer].append(
obs["P{}".format(iplayer+1)]["actions"]["move"])
obs["P{}".format(iplayer + 1)]["actions"]["move"])
self.attack_action_stack[iplayer].append(
obs["P{}".format(iplayer+1)]["actions"]["attack"])
obs["P{}".format(iplayer + 1)]["actions"]["attack"])

# Add noAction for n_actions_stack - 1 times
# in case of new round / stage / continueGame
if ((info["round_done"] or info["stage_done"] or info["game_done"])
and not done):
if ((info["round_done"] or info["stage_done"] or info["game_done"]) and not done):
self.fill_stack()

for iplayer in range(self.n_players):
obs["P{}".format(
iplayer+1)]["actions"]["move"] = self.move_action_stack[iplayer]
iplayer + 1)]["actions"]["move"] = self.move_action_stack[iplayer]
obs["P{}".format(
iplayer+1)]["actions"]["attack"] = self.attack_action_stack[iplayer]
iplayer + 1)]["actions"]["attack"] = self.attack_action_stack[iplayer]
return obs, reward, done, info


Expand Down Expand Up @@ -288,10 +286,10 @@ def scaled_float_obs_space_func(obs_dict):
# One hot encoding x nStack
n_val = v.nvec.shape[0]
max_val = v.nvec[0]
obs_dict.spaces[k] = spaces.MultiDiscrete([2]*(n_val*max_val))
obs_dict.spaces[k] = spaces.MultiBinary(n_val * max_val)
elif isinstance(v, spaces.Discrete) and (v.n > 2):
# One hot encoding
obs_dict.spaces[k] = spaces.MultiDiscrete([2]*(v.n))
obs_dict.spaces[k] = spaces.MultiBinary(v.n)
elif isinstance(v, spaces.Box):
obs_dict.spaces[k] = spaces.Box(
low=0, high=1.0, shape=v.shape, dtype=np.float32)
Expand Down Expand Up @@ -341,3 +339,32 @@ def __len__(self):

def __getitem__(self, i):
return self.force()[i]


_FLAG_FIRST = object()

def flatten_obs_func(input_dictionary):
flattened_dict = {}

def visit(subdict, results, partial_key):
for k, v in subdict.items():
newKey = k if partial_key == _FLAG_FIRST else partial_key + "_" + k
if isinstance(v, Mapping):
visit(v, flattened_dict, newKey)
else:
flattened_dict[newKey] = v

visit(input_dictionary, flattened_dict, _FLAG_FIRST)

return flattened_dict


class FlattenDictObs(gym.ObservationWrapper):
def __init__(self, env):
gym.ObservationWrapper.__init__(self, env)

self.observation_space = spaces.Dict(flatten_obs_func(self.observation_space))

def observation(self, observation):

return flatten_obs_func(observation)