In [1]:
import gym
import random
import numpy as np
from keras.layers import Dense, Flatten
from keras.models import Sequential
from keras.optimizers import Adam

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
env = gym.make('CartPole-v1')

In [3]:
states = env.observation_space.shape[0]
print('States', states)

States 4


In [4]:
actions = env.action_space.n
print('Actions', actions)

Actions 2


In [5]:
episodes = 10

for episode in range(1,episodes+1):
    # At each begining reset the game 
    state = env.reset()
    # set done to False
    done = False
    # set score to 0
    score = 0
    # while the game is not finished
    while not done:
        # visualize each step
        env.render()
        # choose a random action
        action = random.choice([0,1])
        # execute the action
        n_state, reward, done, info = env.step(action)
        # keep track of rewards
        score+=reward
    print('episode {} score {}'.format(episode, score))

episode 1 score 35.0
episode 2 score 15.0
episode 3 score 30.0
episode 4 score 51.0
episode 5 score 12.0
episode 6 score 15.0
episode 7 score 14.0
episode 8 score 25.0
episode 9 score 10.0
episode 10 score 35.0


In [6]:
def agent(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape = (1, states)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model
  
model = agent(env.observation_space.shape[0], env.action_space.n)

In [7]:
from rl.agents import SARSAAgent
from rl.policy import EpsGreedyQPolicy

policy = EpsGreedyQPolicy()

In [8]:
sarsa = SARSAAgent(model = model, policy = policy, nb_actions = env.action_space.n)

In [9]:
sarsa.compile('adam', metrics = ['mse'])

In [10]:
sarsa.fit(env, nb_steps = 50000, visualize = False, verbose = 1)

Training for 50000 steps ...
Interval 1 (0 steps performed)
258 episodes - episode_reward: 37.837 [8.000, 275.000] - loss: 8.962 - mean_squared_error: 521.801 - mean_q: 29.880

Interval 2 (10000 steps performed)
100 episodes - episode_reward: 100.780 [11.000, 500.000] - loss: 9.691 - mean_squared_error: 1403.165 - mean_q: 51.363

Interval 3 (20000 steps performed)
72 episodes - episode_reward: 139.972 [11.000, 500.000] - loss: 3.466 - mean_squared_error: 1458.694 - mean_q: 50.491

Interval 4 (30000 steps performed)
74 episodes - episode_reward: 133.919 [10.000, 434.000] - loss: 2.160 - mean_squared_error: 1464.023 - mean_q: 49.749

Interval 5 (40000 steps performed)
done, took 253.844 seconds


<keras.callbacks.History at 0xb2ea9e4a8>

In [11]:
scores = sarsa.test(env, nb_episodes = 100, visualize= False)
print('Average score over 100 test games:{}'.format(np.mean(scores.history['episode_reward'])))

Testing for 100 episodes ...
Episode 1: reward: 500.000, steps: 500
Episode 2: reward: 500.000, steps: 500
Episode 3: reward: 500.000, steps: 500
Episode 4: reward: 500.000, steps: 500
Episode 5: reward: 500.000, steps: 500
Episode 6: reward: 500.000, steps: 500
Episode 7: reward: 500.000, steps: 500
Episode 8: reward: 500.000, steps: 500
Episode 9: reward: 500.000, steps: 500
Episode 10: reward: 500.000, steps: 500
Episode 11: reward: 500.000, steps: 500
Episode 12: reward: 500.000, steps: 500
Episode 13: reward: 500.000, steps: 500
Episode 14: reward: 500.000, steps: 500
Episode 15: reward: 500.000, steps: 500
Episode 16: reward: 500.000, steps: 500
Episode 17: reward: 500.000, steps: 500
Episode 18: reward: 500.000, steps: 500
Episode 19: reward: 500.000, steps: 500
Episode 20: reward: 500.000, steps: 500
Episode 21: reward: 500.000, steps: 500
Episode 22: reward: 500.000, steps: 500
Episode 23: reward: 500.000, steps: 500
Episode 24: reward: 500.000, steps: 500
Episode 25: reward: 

In [12]:
sarsa.save_weights('sarsa_weights.h5f', overwrite=True)

In [None]:
# sarsa.load_weights('sarsa_weights.h5f')

In [15]:
_ = sarsa.test(env, nb_episodes = 2, visualize= True)

Testing for 2 episodes ...
Episode 1: reward: 500.000, steps: 500
Episode 2: reward: 500.000, steps: 500
