In [1]:
import gym
from gym import wrappers
import numpy as np
import io, base64

In [2]:
print(len(gym.envs.registry.all()))

859


# Basics

In [None]:
env = gym.make('MountainCar-v0') #SpaceInvaders-v0, ...
env._max_episode_steps = 200   # set how many steps to terminate the simulation (default 200)
initstate = env.reset()      #returns the init state

env.render() #to applet
screen = env.render(mode='rgb_array')          #to numpy array (height, width, RGB)
observation, reward, done, info = env.step(1)
env.close()                                                #will close the rendered graphics

In [5]:
print(initstate.size)
print(env.observation_space)
print(isinstance(env.observation_space,gym.spaces.discrete.Discrete)) #test if states are discrete
print(env.observation_space.low, env.observation_space.high) # ranges of observation & action
env.observation_space.shape

2
Box(2,)
False
[-1.2  -0.07] [0.6  0.07]


(2,)

In [4]:
print(env.action_space)
print(isinstance(env.action_space,gym.spaces.discrete.Discrete)) #test if actions allowed are discrete
print(env.action_space.n)  #number of possible actions
action = env.action_space.sample()# take a random action

Discrete(3)
True
3


## [Environment](http://gym.openai.com/envs/)-specific

In [None]:
# MountainCar-v0
env.goal_position

# Wrapper

In [None]:
env = wrappers.Monitor(env, "~/gym-results", force=True)  #record video. env has all its usual methods
video = io.open('./gym-results/openaigym.video.%s.video000000.mp4' % env.file_infix, 'r+b').read()
encoded = base64.b64encode(video)
HTML(data='''
    <video width="360" height="auto" alt="test" controls><source src="data:video/mp4;base64,{0}" type="video/mp4" /></video>'''
.format(encoded.decode('ascii')))

In [13]:
env = env.unwrapped  # undo the above

True

# Custom environment

must provide `reset`, `step` and `render` methods

In [13]:
#### must use the following for action_space and observation_space###
# print(gym.spaces.Box.__doc__)
# print(gym.spaces.Dict.__doc__)
# print(gym.spaces.Discrete.__doc__)
print(gym.spaces.MultiBinary.__doc__)
# print(gym.spaces.MultiDiscrete.__doc__)
# print(gym.spaces.Space.__doc__)
# print(gym.spaces.Tuple.__doc__)

None


In [26]:
import numpy as np
class Catch(gym.Env):   # 1 game is 1 fruit dropped from top to bottom. agent at bottom row to catch it
    metadata = {'render.modes': ['human']}

    def __init__(self, grid_size=10,basket_size=1):
        super(Catch, self).__init__()
        
        self.action_space = gym.spaces.Discrete(3)
        self.observation_space = gym.spaces.MultiBinary(grid_size*grid_size)  # each pixel is 0 or 1
        
        self.grid_size = grid_size
        self.basket_size = basket_size

    def reset(self):           # Reset the state of the environment to an initial state
        n = np.random.randint(0, self.grid_size-1)                # starting fruit_col
        m = np.random.randint(0, self.grid_size-self.basket_size)  # starting basket col
        self.state = np.asarray([0, n, m])                        # [fruit_row, fruit_col, basket's left end]
        return self._observe()
    
    def step(self, action):    # Execute one time step within the environment
        if action == 0:   action = -1 # move left
        elif action == 1: action =  0 # stay
        else:             action =  1 # move right
        f0, f1, basket_left = self.state
        new_basket_left = min(max(0, basket_left + action), self.grid_size-self.basket_size)
        f0 += 1                       # fruit dropped by one pixel
        out = np.asarray([f0, f1, new_basket_left])
        self.state = out
        
        return self._observe(), self._get_reward(), self._is_over(), None # returns whole canvas, R, done?, info
    
    def render(self, mode='human', close=False):        # Render the environment to the screen or other forms of output
        print(state)
    #############################helper methods---not required by gym.env##################################
    def _get_reward(self):   # inc/dec score only if fruit has dropped to bottom
        fruit_row, fruit_col, basket_left = self.state
        if fruit_row == self.grid_size-1 and basket_left <= fruit_col < basket_left+self.basket_size:
            return 1.
        else:
            return 0.

    def _is_over(self):    # game over if fruit dropped to bottom
        return (self.state[0] == self.grid_size-1)
    
    def _observe(self):
        im_size = (self.grid_size, self.grid_size)
        state = self.state
        canvas = np.zeros(im_size)
        canvas[self.state[0], self.state[1]] = 1                          # draw fruit
        canvas[-1, self.state[2]:self.state[2] + self.basket_size+1] = 1  # draw basket
        return canvas.flatten()