Skip to content

Commit

Permalink
env-wrapper (#358)
Browse files Browse the repository at this point in the history
* env-wrapper

* examples fix

* minigrid update

* random frame skip support

* convolution net update

* atari env support

* configs update
  • Loading branch information
Scitator committed Sep 9, 2019
1 parent 6ef26e5 commit c23df02
Show file tree
Hide file tree
Showing 38 changed files with 259 additions and 462 deletions.
12 changes: 7 additions & 5 deletions catalyst/contrib/models/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@

import torch.nn as nn

from .sequential import SequentialNet
from catalyst.contrib.registry import MODULES
from catalyst import utils
from .sequential import SequentialNet


def get_convolution_net(
Expand All @@ -15,12 +16,12 @@ def get_convolution_net(
strides: List = None,
groups: List = None,
use_bias: bool = False,
use_normalization: bool = False,
normalization: str = None,
dropout_rate: float = None,
activation: str = "ReLU"
) -> nn.Module:

channels = channels or [16, 32, 16]
channels = channels or [32, 64, 64]
kernel_sizes = kernel_sizes or [8, 4, 3]
strides = strides or [4, 2, 1]
groups = groups or [1, 1, 1]
Expand All @@ -29,8 +30,9 @@ def get_convolution_net(

def _get_block(**conv_params):
layers = [nn.Conv2d(**conv_params)]
if use_normalization:
layers.append(nn.InstanceNorm2d(conv_params["out_channels"]))
if normalization is not None:
normalization_fn = MODULES.get_if_str(normalization)
layers.append(normalization_fn(conv_params["out_channels"]))
if dropout_rate is not None:
layers.append(nn.Dropout2d(p=dropout_rate))
layers.append(activation_fn(inplace=True))
Expand Down
4 changes: 3 additions & 1 deletion catalyst/rl/environment/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# flake8: noqa

from .gym import GymWrapper
from .environment import EnvironmentWrapper
from .gym import GymEnvWrapper
from .atari import AtariEnvWrapper
32 changes: 32 additions & 0 deletions catalyst/rl/environment/atari.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env python

from .environment import EnvironmentWrapper

from .env_wrappers import make_atari_env


class AtariEnvWrapper(EnvironmentWrapper):
def __init__(
self,
env_id,
max_episode_steps=None,
episode_life=True,
clip_rewards=False,
width=84,
height=84,
grayscale=True,
**params
):
env = make_atari_env(
env_id=env_id,
max_episode_steps=max_episode_steps,
episode_life=episode_life,
clip_rewards=clip_rewards,
width=width,
height=height,
grayscale=grayscale,
)
super().__init__(env=env, **params)


__all__ = ["AtariEnvWrapper"]
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from collections import deque
import numpy as np
import gym
from gym import spaces
Expand All @@ -7,6 +6,38 @@
cv2.ocl.setUseOpenCL(False)


class TransposeImage(gym.ObservationWrapper):
def __init__(self, env=None, op=[2, 0, 1]):
"""
Transpose observation space for images
"""
super().__init__(env)
assert len(op) == 3, f"Error: Operation, {str(op)}, must be dim3"
self.op = op
self._dict_env_space = isinstance(env.observation_space, spaces.Dict)

img_space: spaces.Box = env.observation_space.spaces["image"] \
if self._dict_env_space \
else env.observation_space

self.observation_space = gym.spaces.Box(
low=img_space.low[0, 0, 0],
high=img_space.high[0, 0, 0],
shape=[
img_space.shape[self.op[0]],
img_space.shape[self.op[1]],
img_space.shape[self.op[2]],
],
dtype=img_space.dtype
)

def observation(self, observation):
if self._dict_env_space:
observation = observation["image"]
observation = observation.transpose(self.op[0], self.op[1], self.op[2])
return observation


class TimeLimit(gym.Wrapper):
def __init__(self, env, max_episode_steps=None):
super().__init__(env)
Expand All @@ -26,36 +57,6 @@ def reset(self, **kwargs):
return self.env.reset(**kwargs)


class TransposeObs(gym.ObservationWrapper):
def __init__(self, env=None):
"""
Transpose observation space (base class)
"""
super().__init__(env)


class TransposeImage(TransposeObs):
def __init__(self, env=None, op=[2, 0, 1]):
"""
Transpose observation space for images
"""
super().__init__(env)
assert len(op) == 3, f"Error: Operation, {str(op)}, must be dim3"
self.op = op
obs_shape = self.observation_space.shape
self.observation_space = gym.spaces.Box(
self.observation_space.low[0, 0, 0],
self.observation_space.high[0, 0, 0], [
obs_shape[self.op[0]], obs_shape[self.op[1]],
obs_shape[self.op[2]]
],
dtype=self.observation_space.dtype
)

def observation(self, ob):
return ob.transpose(self.op[0], self.op[1], self.op[2])


class NoopResetEnv(gym.Wrapper):
def __init__(self, env, noop_max=30):
"""
Expand Down Expand Up @@ -149,39 +150,6 @@ def reset(self, **kwargs):
return obs


class MaxAndSkipEnv(gym.Wrapper):
def __init__(self, env, skip=4):
"""Return only every `skip`-th frame"""
gym.Wrapper.__init__(self, env)
# most recent raw observations (for max pooling across time steps)
self._obs_buffer = np.zeros(
(2, ) + env.observation_space.shape, dtype=np.uint8
)
self._skip = skip

def step(self, action):
"""Repeat action, sum reward, and max over last observations."""
total_reward = 0.0
done = None
for i in range(self._skip):
obs, reward, done, info = self.env.step(action)
if i == self._skip - 2:
self._obs_buffer[0] = obs
if i == self._skip - 1:
self._obs_buffer[1] = obs
total_reward += reward
if done:
break
# Note that the observation on the done=True frame
# doesn"t matter
max_frame = self._obs_buffer.max(axis=0)

return max_frame, total_reward, done, info

def reset(self, **kwargs):
return self.env.reset(**kwargs)


class ClipRewardEnv(gym.RewardWrapper):
def __init__(self, env):
gym.RewardWrapper.__init__(self, env)
Expand All @@ -192,7 +160,7 @@ def reward(self, reward):


class WarpFrame(gym.ObservationWrapper):
def __init__(self, env, width=84, height=84, grayscale=True):
def __init__(self, env, height=84, width=84, grayscale=True):
"""Warp frames to 84x84 as done in the Nature paper and later work."""
gym.ObservationWrapper.__init__(self, env)
self.width = width
Expand Down Expand Up @@ -224,116 +192,9 @@ def observation(self, frame):
return frame


class FrameStack(gym.Wrapper):
def __init__(self, env, k):
"""Stack k last frames.
Returns lazy array, which is much more memory efficient.
See Also
--------
baselines.common.atari_wrappers.LazyFrames
"""
gym.Wrapper.__init__(self, env)
self.k = k
self.frames = deque([], maxlen=k)
shp = env.observation_space.shape
self.observation_space = spaces.Box(
low=0,
high=255,
shape=(shp[:-1] + (shp[-1] * k, )),
dtype=env.observation_space.dtype
)

def reset(self):
ob = self.env.reset()
for _ in range(self.k):
self.frames.append(ob)
return self._get_ob()

def step(self, action):
ob, reward, done, info = self.env.step(action)
self.frames.append(ob)
return self._get_ob(), reward, done, info

def _get_ob(self):
assert len(self.frames) == self.k
return LazyFrames(list(self.frames))


class ScaledFloatFrame(gym.ObservationWrapper):
def __init__(self, env):
gym.ObservationWrapper.__init__(self, env)
self.observation_space = gym.spaces.Box(
low=0, high=1, shape=env.observation_space.shape, dtype=np.float32
)

def observation(self, observation):
# careful! This undoes the memory optimization, use
# with smaller replay buffers only.
return np.array(observation).astype(np.float32) / 255.0


class LazyFrames(object):
def __init__(self, frames):
"""
This object ensures that common frames
between the observations are only stored once.
It exists purely to optimize memory usage
which can be huge for DQN"s 1M frames replay buffers.
This object should only be converted to numpy array
before being passed to the model.
You"d not believe how complex the previous solution was."""
self._frames = frames
self._out = None

def _force(self):
if self._out is None:
self._out = np.concatenate(self._frames, axis=-1)
self._frames = None
return self._out

def __array__(self, dtype=None):
out = self._force()
if dtype is not None:
out = out.astype(dtype)
return out

def __len__(self):
return len(self._force())

def __getitem__(self, i):
return self._force()[..., i]


def make_atari(env_id, max_episode_steps=None):
def make_image_env(env_id):
env = gym.make(env_id)
assert "NoFrameskip" in env.spec.id
env = NoopResetEnv(env, noop_max=30)
# env = MaxAndSkipEnv(env, skip=4)
if max_episode_steps is not None:
env = TimeLimit(env, max_episode_steps=max_episode_steps)
return env


def wrap_deepmind(
env, episode_life=True, clip_rewards=True, frame_stack=False, scale=False
):
"""Configure environment for DeepMind-style Atari.
"""
if episode_life:
env = EpisodicLifeEnv(env)
if "FIRE" in env.unwrapped.get_action_meanings():
env = FireResetEnv(env)
env = WarpFrame(env)
if scale:
env = ScaledFloatFrame(env)
if clip_rewards:
env = ClipRewardEnv(env)
if frame_stack:
env = FrameStack(env, 4)
env = TransposeImage(env, op=[2, 0, 1])
return env


Expand All @@ -342,26 +203,22 @@ def make_atari_env(
max_episode_steps=None,
episode_life=True,
clip_rewards=False,
frame_stack=False,
scale=False
width=84,
height=84,
grayscale=True,
):
env = gym.make(env_id)
assert "NoFrameskip" in env.spec.id
env = NoopResetEnv(env, noop_max=30)
# env = MaxAndSkipEnv(env, skip=4)
if max_episode_steps is not None:
env = TimeLimit(env, max_episode_steps=max_episode_steps)
if episode_life:
env = EpisodicLifeEnv(env)
if "FIRE" in env.unwrapped.get_action_meanings():
env = FireResetEnv(env)
env = WarpFrame(env)
if scale:
env = ScaledFloatFrame(env)
env = WarpFrame(env, height=height, width=width, grayscale=grayscale)
if clip_rewards:
env = ClipRewardEnv(env)
if frame_stack:
env = FrameStack(env, 4)

env = TransposeImage(env, op=[2, 0, 1])
return env

0 comments on commit c23df02

Please sign in to comment.