For this introduction to OpenAI Gym, I am following [this](https://blog.paperspace.com/getting-started-with-openai-gym/) article  

2. Atari Breakout

In [1]:
import gym
import time

In [7]:
env = gym.make('BreakoutNoFrameskip-v4')

obs_space = env.observation_space
action_space = env.action_space

num_steps = 1000

# reset env
env.reset()
    
for i in range(num_steps):
    # pick a random action
    action = action_space.sample()
    
    # step
    new_obs, reward, done, info = env.step(action)
    env.render()
    
    # wait a little before next frame
    time.sleep(0.001)
    
    # # if done, reset
    # if done:
    #     env.reset()
        
# close rendering window
env.close()

### Using a wrapper to concatenate observations

In [8]:
from collections import deque
from gym import spaces
import numpy as np

class ConcatObs(gym.Wrapper):
    def __init__(self, env, k):
        gym.Wrapper.__init__(self, env)
        self.k = k
        self.frames = deque([], maxlen=k)
        shp = env.observation_space.shape
        self.observation_space = \
            spaces.Box(low=0, high=255, shape=((k,) + shp), dtype=env.observation_space.dtype)


def reset(self):
    ob = self.env.reset()
    for _ in range(self.k):
        self.frames.append(ob)
    return self._get_ob()

def step(self, action):
    ob, reward, done, info = self.env.step(action)
    self.frames.append(ob)
    return self._get_ob(), reward, done, info

def _get_ob(self):
    return np.array(self.frames)

Use the wrapper class

In [9]:
env = gym.make("BreakoutNoFrameskip-v4")
wrapped_env = ConcatObs(env, 4)
print("The new observation space is", wrapped_env.observation_space)

The new observation space is Box([[[[0 0 0]
   [0 0 0]
   [0 0 0]
   ...
   [0 0 0]
   [0 0 0]
   [0 0 0]]

  [[0 0 0]
   [0 0 0]
   [0 0 0]
   ...
   [0 0 0]
   [0 0 0]
   [0 0 0]]

  [[0 0 0]
   [0 0 0]
   [0 0 0]
   ...
   [0 0 0]
   [0 0 0]
   [0 0 0]]

  ...

  [[0 0 0]
   [0 0 0]
   [0 0 0]
   ...
   [0 0 0]
   [0 0 0]
   [0 0 0]]

  [[0 0 0]
   [0 0 0]
   [0 0 0]
   ...
   [0 0 0]
   [0 0 0]
   [0 0 0]]

  [[0 0 0]
   [0 0 0]
   [0 0 0]
   ...
   [0 0 0]
   [0 0 0]
   [0 0 0]]]


 [[[0 0 0]
   [0 0 0]
   [0 0 0]
   ...
   [0 0 0]
   [0 0 0]
   [0 0 0]]

  [[0 0 0]
   [0 0 0]
   [0 0 0]
   ...
   [0 0 0]
   [0 0 0]
   [0 0 0]]

  [[0 0 0]
   [0 0 0]
   [0 0 0]
   ...
   [0 0 0]
   [0 0 0]
   [0 0 0]]

  ...

  [[0 0 0]
   [0 0 0]
   [0 0 0]
   ...
   [0 0 0]
   [0 0 0]
   [0 0 0]]

  [[0 0 0]
   [0 0 0]
   [0 0 0]
   ...
   [0 0 0]
   [0 0 0]
   [0 0 0]]

  [[0 0 0]
   [0 0 0]
   [0 0 0]
   ...
   [0 0 0]
   [0 0 0]
   [0 0 0]]]


 [[[0 0 0]
   [0 0 0]
   [0 0 0]
   ...
   [0 0 0

Verify that the frames are concatenated:

In [10]:
# Reset the Env
obs = wrapped_env.reset()
print("Intial obs is of the shape", obs.shape)

# Take one step
obs, _, _, _  = wrapped_env.step(2)
print("Obs after taking a step is", obs.shape)

Intial obs is of the shape (210, 160, 3)
Obs after taking a step is (210, 160, 3)


### Other wrapper examples
Below are wrappers to:
- normalize the pixel observations by 255.
- clip the rewards between 0 and 1.
- prevent the slider from moving to the left (action 3)

In [20]:
import random

class ObsWrapper(gym.ObservationWrapper):
    def __init__(self, env):
        super().__init__(env)
        
    def observation(self, obs):
        # Normalize observation by 255
        return obs / 255.0
    
class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
        
    def reward(self, reward):
        # Clip reward between 0 to 1
        return np.clip(reward, 0, 1)
    
class ActionWrapper(gym.ActionWrapper):
    def __init__(self, env):
        super().__init__(env)
        
    def action(self, action):
        if action == 3:
            return random.choice([0,1,2])
        
        else: return action

In [21]:
env = gym.make("BreakoutNoFrameskip-v4")

Apply all these wrappers to an env in one line of code

In [22]:
wrapped_env = ActionWrapper(RewardWrapper(ObsWrapper(env)))
# wrapped_env = ObsWrapper(RewardWrapper(ActionWrapper(env)))

Test that the changes have been effected:

In [23]:
timesteps = 1000

# initialize the environment
obs = wrapped_env.reset()

for i in range(timesteps):
    # select a random action
    action = wrapped_env.action_space.sample()
    
    # step
    obs, reward, done, info = wrapped_env.step(action)
    
    # Raise a flag if values have not been vectorised properly
    if (0 > obs).any() or (obs > 1).any():
        print('Something may be wrong. Observed values not normalized')
        
    # Raise a flag if reward out of bounds
    if 0 > reward > 1:
        print('Something may be wrong. Reward not within range')
        
    # Use your korokoro eyes to observe if the agent moves left
    wrapped_env.render()
    
    # wait a bit
    time.sleep(0.001)
    
    # if done, reset
    if done:
        wrapped_env.reset()
        
wrapped_env.close()

print('all checks passed')

all checks passed


Unwrapping an env

In [25]:
print(f'{wrapped_env = }')
print(f'{wrapped_env.unwrapped = }')

wrapped_env = <ActionWrapper<RewardWrapper<ObsWrapper<TimeLimit<AtariEnv<BreakoutNoFrameskip-v4>>>>>>
wrapped_env.unwrapped = <gym.envs.atari.atari_env.AtariEnv object at 0x000001C8A7E2B940>


In [30]:
print(f'{wrapped_env.unwrapped.get_keys_to_action()}')
print(f'{wrapped_env.unwrapped.get_action_meanings()}')

{(): 0, (32,): 1, (100,): 2, (97,): 3}
['NOOP', 'FIRE', 'RIGHT', 'LEFT']


### Vectorized environments with the `baselines` library

In [31]:
# install baselines
!git clone https://github.com/openai/baselines
!cd baselines
!pip install .

Cloning into 'baselines'...
ERROR: Directory '.' is not installable. Neither 'setup.py' nor 'pyproject.toml' found.


In [33]:
from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv

In [34]:
num_envs = 3
envs = [lambda : gym.make('BreakoutNoFrameskip-v4') for i in range(num_envs)]

# Vec env
envs = SubprocVecEnv(envs)

In [35]:
# get the initial state
init_obs = envs.reset()

# We get a list of observations corresponding to parallel environments 
print("Number of Envs:", len(init_obs))

# Check out of the obs 
one_obs = init_obs[0]
print("Shape of one Env:", one_obs.shape)

# prepare a list of actions and apply them to environment 
actions = [0, 1, 2]
obs = envs.step(actions)

Number of Envs: 3
Shape of one Env: (210, 160, 3)


In [36]:
# render the envs
import time 

# list of envs 
num_envs = 3
envs = [lambda: gym.make("BreakoutNoFrameskip-v4") for i in range(num_envs)]

# Vec Env 
envs = SubprocVecEnv(envs)

init_obs = envs.reset()

for i in range(1000):
    actions = [envs.action_space.sample() for i in range(num_envs)]
    envs.step(actions)
    envs.render()
    time.sleep(0.001)

envs.close()

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "C:\Users\akino\Anaconda3\envs\gym-atari\lib\site-packages\IPython\core\interactiveshell.py", line 3398, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\akino\AppData\Local\Temp\ipykernel_14060\119525547.py", line 14, in <cell line: 13>
    actions = [envs.action_space.sample() for i in range(num_envs)]
  File "C:\Users\akino\AppData\Local\Temp\ipykernel_14060\119525547.py", line 14, in <listcomp>
    actions = [envs.action_space.sample() for i in range(num_envs)]
  File "C:\Users\akino\Anaconda3\envs\gym-atari\lib\site-packages\gym\spaces\discrete.py", line 20, in sample
  File "C:\Users\akino\Anaconda3\envs\gym-atari\lib\site-packages\gym\spaces\space.py", line 31, in np_random
    raise NotImplementedError
AttributeError: 'Discrete' object has no attribute '_np_random'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\akino\Anaconda3\en