In [1]:
import random
import numpy as np
import tensorflow as tf
import gym

In [2]:
class RandomAgent():
    def __init__(self, env):
        self.action_size = env.action_space.n
        print('Action space size: ', self.action_size)
    
    def get_action(self, observation):
        action = random.choice(range(self.action_size))
        return action

In [3]:
class GeneralRandomAgent():
    def __init__(self, env):
        self.is_discrete = type(env.action_space) == gym.spaces.discrete.Discrete
        
        if self.is_discrete:
            self.action_size = env.action_space.n
            print('Action space size: ', self.action_size)
        else: 
            self.action_low = env.action_space.low
            self.action_high = env.action_space.high
            self.action_shape = env.action_space.shape
            print('Action range: ', self.action_low, self.action_high)
    
    def get_action(self, observation):
        if self.is_discrete:
            action = random.choice(range(self.action_size))
        else:
            action = np.random.uniform(self.action_low, self.action_high, self.action_shape)
        return action

In [4]:
class SimpleReflexAgent():
    def __init__(self, env):
        self.action_size = env.action_space.n
        print('Action space size: ', self.action_size)
    
    def get_action(self, observation):
        angle = observation[2]
        
        if angle < 0:
            action = 0
        else:
            action = 1 
        return action

In [5]:
# http://gym.openai.com/envs/CartPole-v1/
env = gym.make('CartPole-v0')
print('Observation space: ', env.observation_space)
print('Action space: ', env.action_space)

Observation space:  Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)
Action space:  Discrete(2)


In [6]:
# Random agent
number_of_episodes = 15
number_of_steps = 200
observation = env.reset()
agent = GeneralRandomAgent(env)
for episode in range(number_of_episodes):
    observation = env.reset()
    for t in range(number_of_steps):
        env.render()
        action = agent.get_action(observation)
        observation, reward, done, info = env.step(action)
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            break
env.close()

Action space size:  2
Episode finished after 70 timesteps
Episode finished after 44 timesteps
Episode finished after 19 timesteps
Episode finished after 30 timesteps
Episode finished after 13 timesteps
Episode finished after 35 timesteps
Episode finished after 49 timesteps
Episode finished after 9 timesteps
Episode finished after 18 timesteps
Episode finished after 36 timesteps
Episode finished after 16 timesteps
Episode finished after 26 timesteps
Episode finished after 37 timesteps
Episode finished after 13 timesteps
Episode finished after 17 timesteps


In [8]:
# Simple reflex agent
number_of_episodes = 5
number_of_steps = 200
observation = env.reset()
agent = SimpleReflexAgent(env)
for episode in range(number_of_episodes):
    observation = env.reset()
    for t in range(number_of_steps):
        env.render()
        action = agent.get_action(observation)
        observation, reward, done, info = env.step(action)
        if done:
            print("Episode finished after {} timesteps".format(t+1))
            break
env.close()

Action space size:  2
Episode finished after 52 timesteps
Episode finished after 52 timesteps
Episode finished after 49 timesteps
Episode finished after 45 timesteps
Episode finished after 48 timesteps
