In [1]:
import numpy as np
import gym
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam
from rl.agents.cem import CEMAgent
from rl.memory import EpisodeParameterMemory

# config
# import loonminator game environment
import gym_loon
ENV_NAME = 'loon-v0'
intervals = 100
# how long to train for
nb_steps = 10000
batch_size = 500
# how many nodes per hidden layer
hidden_size = 200
# render not yet implemented
visual = False

# automatic config
env = gym.make(ENV_NAME)
np.random.seed(1)
env.seed(1)
# define actions to take
nb_actions = env.action_space.n
obs_dim = env.observation_space.shape[0]

# create network
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(hidden_size))
model.add(Activation('relu'))
model.add(Dense(hidden_size))
model.add(Activation('relu'))
model.add(Dense(hidden_size))
model.add(Activation('relu'))
model.add(Dense(hidden_size))
model.add(Activation('relu'))
model.add(Dense(hidden_size))
model.add(Activation('relu'))
model.add(Dense(hidden_size))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))
print(model.summary())

# compile agent
memory = EpisodeParameterMemory(limit=1000, window_length=1)
cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
               batch_size=batch_size, nb_steps_warmup=nb_steps * .02,
               train_interval=intervals, elite_frac=0.2)
cem.compile()

# train
# increase verbose for more info
cem.fit(env, nb_steps=nb_steps, log_interval=intervals, visualize=False, verbose=1)

# save model after training
cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)

# test for five games
cem.test(env, nb_episodes=5, visualize=visual)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 1)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 200)               400       
_________________________________________________________________
activation_1 (Activation)    (None, 200)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 200)               40200     
_________________________________________________________________
activation_2 (Activation)    (None, 200)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 200)               40200     
_________________________________________________________________
activation_3 (Activation)    (None, 200)               0         
__________

1 episodes - episode_reward: 113498.748 [113498.748, 113498.748]

Interval 35 (3400 steps performed)
1 episodes - episode_reward: 98872.734 [98872.734, 98872.734]

Interval 36 (3500 steps performed)
1 episodes - episode_reward: 114169.677 [114169.677, 114169.677]

Interval 37 (3600 steps performed)
1 episodes - episode_reward: 114736.538 [114736.538, 114736.538]

Interval 38 (3700 steps performed)
1 episodes - episode_reward: 105774.916 [105774.916, 105774.916]

Interval 39 (3800 steps performed)
1 episodes - episode_reward: 114105.494 [114105.494, 114105.494]

Interval 40 (3900 steps performed)
1 episodes - episode_reward: 111514.304 [111514.304, 111514.304]

Interval 41 (4000 steps performed)
1 episodes - episode_reward: 106480.976 [106480.976, 106480.976]

Interval 42 (4100 steps performed)
1 episodes - episode_reward: 111983.360 [111983.360, 111983.360]

Interval 43 (4200 steps performed)
1 episodes - episode_reward: 115972.927 [115972.927, 115972.927]

Interval 44 (4300 steps perf

1 episodes - episode_reward: 114960.565 [114960.565, 114960.565]

Interval 81 (8000 steps performed)
1 episodes - episode_reward: 114901.058 [114901.058, 114901.058]

Interval 82 (8100 steps performed)
1 episodes - episode_reward: 111689.077 [111689.077, 111689.077]

Interval 83 (8200 steps performed)
1 episodes - episode_reward: 114869.690 [114869.690, 114869.690]

Interval 84 (8300 steps performed)
1 episodes - episode_reward: 106567.294 [106567.294, 106567.294]

Interval 85 (8400 steps performed)
1 episodes - episode_reward: 115301.456 [115301.456, 115301.456]

Interval 86 (8500 steps performed)
1 episodes - episode_reward: 105520.447 [105520.447, 105520.447]

Interval 87 (8600 steps performed)
1 episodes - episode_reward: 99287.995 [99287.995, 99287.995]

Interval 88 (8700 steps performed)
1 episodes - episode_reward: 107779.765 [107779.765, 107779.765]

Interval 89 (8800 steps performed)
1 episodes - episode_reward: 102637.129 [102637.129, 102637.129]

Interval 90 (8900 steps perf

<keras.callbacks.History at 0x11f910630>