In [1]:
import numpy as np
import gym
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam
from rl.agents.cem import CEMAgent
from rl.memory import EpisodeParameterMemory

# config
# import loonminator game environment
import gym_loon
ENV_NAME = 'loon-v0'
intervals = 100
# how long to train for
nb_steps = 500000
batch_size = 50
# how many nodes per hidden layer
hidden_size = 200
# render not yet implemented
visual = False

# automatic config
env = gym.make(ENV_NAME)
np.random.seed(1)
env.seed(1)
# define actions to take
nb_actions = env.action_space.n
obs_dim = env.observation_space.shape[0]

# create network
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(hidden_size))
model.add(Activation('relu'))
model.add(Dense(hidden_size))
model.add(Activation('relu'))
model.add(Dense(hidden_size))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))
print(model.summary())

# compile agent
memory = EpisodeParameterMemory(limit=1000, window_length=1)
cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
               batch_size=batch_size, nb_steps_warmup=nb_steps * .02,
               train_interval=intervals, elite_frac=0.2)
cem.compile()

# train
# increase verbose for more info
cem.fit(env, nb_steps=nb_steps, visualize=False, verbose=1)

# save model after training
cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)

# test for five games
cem.test(env, nb_episodes=5, visualize=visual)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 1)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 200)               400       
_________________________________________________________________
activation_1 (Activation)    (None, 200)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 200)               40200     
_________________________________________________________________
activation_2 (Activation)    (None, 200)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 200)               40200     
_________________________________________________________________
activation_3 (Activation)    (None, 200)               0         
__________

<keras.callbacks.History at 0x11d352668>