In [6]:
import gym
import gym_digger
from os import system
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten
from tensorflow.keras.optimizers import Adam
import time
from IPython.display import clear_output

IN_FILE = 'saved_weights/dqn_Digger-v0_4x4_h_weights_a.h5f'

# environment settings
ENV_NAME = 'Digger-v0'
MAP_NAME = '4x4_h'
BATTERY = 1000
COMPLETION_BONUS = 0
BATTERY_PENALTY = 0

# keras settings
SEQUENTIAL_MEMORY = 100000
WINDOW_LENGTH = 1
STEPS_WARMUP = 10
TARGET_MODEL_UPDATE = 1e-2
LEARNING_RATE = 1e-3
STEPS = 400000

# build the environment
env = gym.make(ENV_NAME, map_name=MAP_NAME, battery=BATTERY, completion_bonus=COMPLETION_BONUS,
               battery_penalty=BATTERY_PENALTY)
nb_actions = env.action_space.n

# build the model
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))

# configure and compile the agent
memory = SequentialMemory(limit=SEQUENTIAL_MEMORY, window_length=WINDOW_LENGTH)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=STEPS_WARMUP,
               target_model_update=TARGET_MODEL_UPDATE, policy=policy)
dqn.compile(Adam(lr=LEARNING_RATE), metrics=['mae'])

# load the weights
dqn.load_weights(IN_FILE)

# run the test
for episode in range(5):
    observation = env.reset()
    print(f'Start episode {episode}', end='')
    env.render()
    time.sleep(2)
    clear_output(wait=True)
    done = False
    step = 0
    total_reward = 0
    while not done:
        step += 1
        action = dqn.forward(observation)
        observation, reward, done, info = env.step(action)
        battery = info['battery']
        score = info['score']
        total_reward += reward

        # render it
        env.render()       
        
        # print stats
        print(f'\nepisode: {episode}\nstep: {step}\naction: {action}\nreward: {reward}\ntotal_reward: {total_reward}\ndone: {done}\nbattery: {battery}\nscore: {score}')

        # delay
        time.sleep(0.5)
        clear_output(wait=True)


(Dig)
0000
[41m0[0m000
0000
0000

episode: 4
step: 30
action: 4
reward: 1
total_reward: 8
done: True
battery: 970
score: 19
