# Random CartPole Agent

Sample a random action and then ask the environment to execute it returning to us the reward and done flag.

Exit when episode is complete and print the execution statistics

In [28]:
import gym
import random

In [18]:
def run(env):
    total_reward = 0.0
    total_steps = 0
    running = True
    while running:
        action = env.action_space.sample()
        obs, reward, done, _ = env.step(action)
        total_reward += reward
        total_steps += 1
        running = not done
    print("Episode done in %d steps, total reward %.2f" % (total_steps, total_reward))

In [24]:
env = gym.make('CartPole-v0')
episodes = 10
for episode in range(episodes):
    obs = env.reset()
    run(env)

Episode done in 21 steps, total reward 21.00
Episode done in 13 steps, total reward 13.00
Episode done in 26 steps, total reward 26.00
Episode done in 11 steps, total reward 11.00
Episode done in 14 steps, total reward 14.00
Episode done in 15 steps, total reward 15.00
Episode done in 34 steps, total reward 34.00
Episode done in 13 steps, total reward 13.00
Episode done in 16 steps, total reward 16.00
Episode done in 29 steps, total reward 29.00


## Random Action Wrapper

Define some probability $\epsilon$ with which the agent specified action should be replaced with a random action prior to being executed against the environment

In [25]:
class RandomActionWrapper(gym.ActionWrapper):
    def __init__(self, env, epsilon=0.1):
        super(RandomActionWrapper, self).__init__(env)
        self.epsilon = epsilon
        
    def action(self, action):
        if random.random() < self.epsilon:
            print("Random!")
            return self.env.action_space.sample()
        return action

In [29]:
env = RandomActionWrapper(gym.make("CartPole-v0"))
obs = env.reset()
total_reward = 0.0
ru
while True:
    obs, reward, done, _ = env.step(0)
    total_reward += reward
    if done:
        break
    print("Reward got: %.2f" % total_reward)

Reward got: 1.00
Reward got: 2.00
Reward got: 3.00
Reward got: 4.00
Random!
Reward got: 5.00
Reward got: 6.00
Reward got: 7.00
Reward got: 8.00


## Adding a Monitor

In [34]:
env = gym.make('CartPole-v0')
env = gym.wrappers.Monitor(env, "recording", force=True)

total_reward = 0.0
total_steps = 0
obs = env.reset()

while True:
    action = env.action_space.sample()
    obs, reward, done, _ = env.step(action)
    total_reward += reward
    total_steps += 1
    if done:
        break

print("Episode done in %d steps, total reward %.2f" % (total_steps, total_reward))
env.close()
env.env.close()

Episode done in 27 steps, total reward 27.00
