In [3]:
%matplotlib inline

from __future__ import print_function

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

from MazeSim_OneFrame_New import MazeSimulator
from itertools import count
from DQNAgent_OneFrame_New import DeepQNetwork_OneFrame, experience_buffer

settings = {
    'objects': [
        'friend',
        'enemy',
    ],
    'colors': {
        'hero':   'yellow',
        'friend': 'green',
        'enemy':  'red',
    },
    'object_reward': {
        'obstacle': -100,
        'wall': -100,
    },
    'hero_bounces_off_walls': False,
    'enemy_move':True,
    'world_size': (900,500),
    'hero_initial_position': [450, 250],
    'hero_initial_speed':    [0,   0],
    'end_position':          [750, 450],
    "maximum_speed":         [50, 50],
    "enemy_max_speed":       [50, 50],
    "object_radius": 30.0,
    "hero_radius": 10,
    "num_objects": {
        "friend" : 0,
        "enemy" :  20,
    },
    "num_observation_lines" : 200,
    "observation_line_length": 200,
    "laser_buffer_length":1,
    "obstacle_tolerance":0.2,
    "delta_v": 50
}

In [14]:
# Avoidance Network
g2 = tf.Graph()
with g2.as_default():
    AvdNet = DeepQNetwork_OneFrame(settings['num_observation_lines'], 5)
    saver_AvdNet = tf.train.Saver()
sess2 = tf.Session(graph=g2)
saver_AvdNet.restore(sess2, 'AvoidNet_OneFrame_New_0.cptk')


In [15]:
# Test Simulation Config
test_log = []
test_avglog = []
for i in range(0, 37, 4):
    # restore models
    filename = 'AvoidNet_OneFrame_New_' + str(i) + '.cptk'
    saver_AvdNet.restore(sess2, filename)
    print('Evaluation on Model' + str(i) + 'starts now!')
    
    ShowInterval = 1
    test_episodes = 100
    test_maxsteps = 5000
    test_steplog = []
    test_success = 0
    total_steps = 0
    stopSession = True
    optimal_switch = 0.18
    # Start Testing
    try:
        for episode in range(test_episodes):
            # reset environment and get first observation
            sim = MazeSimulator(settings)
            sA = sim.observe()
            d = False
            rAll = 0

            for simstep in range(test_maxsteps):

                # generate action
                aA = sess2.run(AvdNet.predict, feed_dict={AvdNet.ob_input:[sA]})[0]
                qA = sess2.run(AvdNet.Qout, feed_dict={AvdNet.ob_input:[sA]})[0]
                ac = aA

                sim.perform_action(ac)
                sim.step(0.1)

                # observe
                sA = sim.observe()
                # get reward
                # only hit reward, goal reward and time punish
                r = sim.collect_reward(0, sA)

                # show 
                if simstep % ShowInterval == 0 or sim.GameOver:
                    #sim.draw([total_steps])
                    pass


                total_steps += 1
                d = sim.GameOver

                rAll += r

                if sim.GameOver:
                    break
            # log
            test_steplog.append(simstep)
            if sim.GameSuccess:
                test_success += 1

            print('Ep:', episode, 'TS:', total_steps, 'ES:', simstep, 'Succ', test_success)
            # END of one episode    
        # END of all episode
    except KeyboardInterrupt:
        if stopSession:
            sess2.close()
            print("Interrupted, session closed")
        else:
            print("Interrupted, session not closed")
    test_log.append(test_steplog)
    test_avglog.append(np.mean(test_steplog))
    print('Test on Model' + str(i) + 'finished. Average step is', test_avglog[-1])

Evaluation on Model0starts now!
Ep: 0 TS: 16 ES: 15 Succ 0
Ep: 1 TS: 33 ES: 16 Succ 0
Ep: 2 TS: 48 ES: 14 Succ 0
Ep: 3 TS: 69 ES: 20 Succ 0
Ep: 4 TS: 107 ES: 37 Succ 0
Ep: 5 TS: 150 ES: 42 Succ 0
Ep: 6 TS: 194 ES: 43 Succ 0
Ep: 7 TS: 222 ES: 27 Succ 0
Ep: 8 TS: 236 ES: 13 Succ 0
Ep: 9 TS: 263 ES: 26 Succ 0
Ep: 10 TS: 293 ES: 29 Succ 0
Ep: 11 TS: 306 ES: 12 Succ 0
Ep: 12 TS: 405 ES: 98 Succ 0
Ep: 13 TS: 506 ES: 100 Succ 0
Ep: 14 TS: 551 ES: 44 Succ 0
Ep: 15 TS: 567 ES: 15 Succ 0
Ep: 16 TS: 595 ES: 27 Succ 0
Ep: 17 TS: 630 ES: 34 Succ 0
Ep: 18 TS: 649 ES: 18 Succ 0
Ep: 19 TS: 667 ES: 17 Succ 0
Ep: 20 TS: 702 ES: 34 Succ 0
Ep: 21 TS: 734 ES: 31 Succ 0
Ep: 22 TS: 761 ES: 26 Succ 0
Ep: 23 TS: 808 ES: 46 Succ 0
Ep: 24 TS: 836 ES: 27 Succ 0
Ep: 25 TS: 896 ES: 59 Succ 0
Ep: 26 TS: 914 ES: 17 Succ 0
Ep: 27 TS: 925 ES: 10 Succ 0
Ep: 28 TS: 947 ES: 21 Succ 0
Ep: 29 TS: 986 ES: 38 Succ 0
Ep: 30 TS: 1022 ES: 35 Succ 0
Ep: 31 TS: 1049 ES: 26 Succ 0
Ep: 32 TS: 1087 ES: 37 Succ 0
Ep: 33 TS: 1102 ES: 1

In [18]:
import pprint, pickle
output = open('EvalLog_OneFrame.pkl', 'wb')
pickle.dump(test_log, output)
output.close()