In [1]:
import numpy as np
import gym
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam
from rl.agents.cem import CEMAgent
from rl.memory import EpisodeParameterMemory
# import custom loonminator game environment
import gym_loon

# config
# ENV_NAME: the environment of the game to train on
#           alternatives can be openai defined environments such as atari games
# intervals: how many steps per epoch
# nb_steps: total steps to train for, epochs is nb_steps / intervals
# batch_size: training examples per iteration
# hidden_size: number of nodes per hidden layer in the network
# visual: whether or not to visualize end testing - can set true for openai envs
ENV_NAME = 'loon-v0'
intervals = 100
nb_steps = 10000
batch_size = 500
hidden_size = 200
visual = False

# automatic config
env = gym.make(ENV_NAME)
np.random.seed(1)
env.seed(1)

# define actions to take
nb_actions = env.action_space.n
obs_dim = env.observation_space.shape[0]

# create network
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(hidden_size))
model.add(Activation('relu'))
model.add(Dense(hidden_size))
model.add(Activation('relu'))
model.add(Dense(hidden_size))
model.add(Activation('relu'))
model.add(Dense(hidden_size))
model.add(Activation('relu'))
model.add(Dense(hidden_size))
model.add(Activation('relu'))
model.add(Dense(hidden_size))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))
print(model.summary())

# compile
memory = EpisodeParameterMemory(limit=1000, window_length=1)
cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
               batch_size=batch_size, nb_steps_warmup=nb_steps * .02,
               train_interval=intervals, elite_frac=0.2)
cem.compile()

# train the agent
# increase verbose for more info
cem.fit(env, nb_steps=nb_steps, log_interval=intervals, visualize=False, verbose=1)

# save model after training
cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)

# test for five games
cem.test(env, nb_episodes=5, visualize=visual)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 1)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 200)               400       
_________________________________________________________________
activation_1 (Activation)    (None, 200)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 200)               40200     
_________________________________________________________________
activation_2 (Activation)    (None, 200)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 200)               40200     
_________________________________________________________________
activation_3 (Activation)    (None, 200)               0         
__________

1 episodes - episode_reward: 114960.565 [114960.565, 114960.565]

Interval 81 (8000 steps performed)
1 episodes - episode_reward: 114901.058 [114901.058, 114901.058]

Interval 82 (8100 steps performed)
1 episodes - episode_reward: 111689.077 [111689.077, 111689.077]

Interval 83 (8200 steps performed)
1 episodes - episode_reward: 114869.690 [114869.690, 114869.690]

Interval 84 (8300 steps performed)
1 episodes - episode_reward: 106567.294 [106567.294, 106567.294]

Interval 85 (8400 steps performed)
1 episodes - episode_reward: 115301.456 [115301.456, 115301.456]

Interval 86 (8500 steps performed)
1 episodes - episode_reward: 105520.447 [105520.447, 105520.447]

Interval 87 (8600 steps performed)
1 episodes - episode_reward: 99287.995 [99287.995, 99287.995]

Interval 88 (8700 steps performed)
1 episodes - episode_reward: 107779.765 [107779.765, 107779.765]

Interval 89 (8800 steps performed)
1 episodes - episode_reward: 102637.129 [102637.129, 102637.129]

Interval 90 (8900 steps perf

<keras.callbacks.History at 0x11f910630>