In [1]:
import docker 
import pandas as pd
import tensorflow as tf
from gym_torcs_docker import TorcsDockerEnv, obs_to_state
from ddpg import DDPG
from a3c import A3C
from networks import A3CNetwork

docker_client = docker.from_env()

In [2]:
def testModelOnTrack(
        docker_client, sess, model, trackname, max_steps=1000,
        docker_port=3101):
    """Drives the model around the specified track for 1000 time steps"""

    env = TorcsDockerEnv(
        docker_client, 'test', port=docker_port, track_name=trackname)
    env._set_track()
    observation = env.reset(relaunch=True)
    state_t = obs_to_state(observation)

    results = {}

    for i in range(max_steps):
        action_t = model.predict(sess, state_t.reshape(1, state_t.shape[0]))
        observation, reward_t, done, _ = env.step(action_t[0])
        state_t = obs_to_state(observation)
        results[i] = reward_t
        if done:
            break

    env.end()

    return results


def testDDPG(docker_client, modeldir, test_tracks):
    """Loads the weights from the model dir and drives the agent around the provided test tracks"""
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    
    tf.reset_default_graph()

    model = DDPG(docker_client)

    saver = tf.train.Saver(max_to_keep=5)
    rewards = {}
    with tf.Session(config=config) as sess:
        ckpt = tf.train.get_checkpoint_state(modeldir)
        saver.restore(sess, ckpt.model_checkpoint_path)
        
        for track in test_tracks:
            reward = testModelOnTrack(
                docker_client, sess, model.actor, track, max_steps=1000,
                docker_port=3121)
            rewards[track] = reward
    return rewards


def testA3C(docker_client, modeldir, test_tracks):
    """Loads the weights from the model dir and drives the agent around the provided test tracks"""
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    
    # the device needs to be set otherwise the cpkt restore does not work
    tf.reset_default_graph()
    with tf.device('/cpu:0'):
        test_net =  A3CNetwork(29, 2, None, 'global')
    saver = tf.train.Saver(max_to_keep=5)
    rewards = {}
    with tf.Session(config=config) as sess:
        ckpt = tf.train.get_checkpoint_state(modeldir)
        saver.restore(sess, modeldir)

        for track in test_tracks:
            reward = testModelOnTrack(
                docker_client, sess, test_net, track, max_steps=1000,
                docker_port=3121)
            rewards[track] = reward
    return rewards

In [3]:
test_tracks = ['g-track-3', 'e-track-6', 'alpine-2']

path_ddpg_ref = '../models/ddpg_ref'
path_ddpg_1 = '../models/ddpg_1'
path_ddpg_2 = '../models/ddpg_2'
path_a3c = '../models/a3c/model-3162.cptk'

In [4]:
ddpg_ref = testDDPG(docker_client, path_ddpg_ref, test_tracks)
ddpg_1 = testDDPG(docker_client, path_ddpg_1, test_tracks)
ddpg_2 = testDDPG(docker_client, path_ddpg_2, test_tracks)

INFO:tensorflow:Restoring parameters from ../models/ddpg_ref/model-1050.cptk


[2017-04-25 03:08:47,917] Restoring parameters from ../models/ddpg_ref/model-1050.cptk


Waiting for server on 3121............
Waiting for server on 3121............
Client connected on 3121..............
Waiting for server on 3121............
Waiting for server on 3121............
Waiting for server on 3121............
Client connected on 3121..............
Waiting for server on 3121............
Waiting for server on 3121............
Waiting for server on 3121............
Client connected on 3121..............
INFO:tensorflow:Restoring parameters from ../models/ddpg_1/model-1550.cptk


[2017-04-25 03:09:24,567] Restoring parameters from ../models/ddpg_1/model-1550.cptk


Waiting for server on 3121............
Waiting for server on 3121............
Client connected on 3121..............
Waiting for server on 3121............
Waiting for server on 3121............
Waiting for server on 3121............
Client connected on 3121..............
Waiting for server on 3121............
Waiting for server on 3121............
Client connected on 3121..............
INFO:tensorflow:Restoring parameters from ../models/ddpg_2/model-250.cptk


[2017-04-25 03:09:45,661] Restoring parameters from ../models/ddpg_2/model-250.cptk


Waiting for server on 3121............
Waiting for server on 3121............
Client connected on 3121..............
Waiting for server on 3121............
Waiting for server on 3121............
Waiting for server on 3121............
Client connected on 3121..............
Waiting for server on 3121............
Waiting for server on 3121............
Client connected on 3121..............


In [5]:
a3c = testA3C(docker_client, path_a3c, test_tracks)

INFO:tensorflow:Restoring parameters from ../models/a3c/model-3162.cptk


[2017-04-25 03:10:05,318] Restoring parameters from ../models/a3c/model-3162.cptk


Waiting for server on 3121............
Waiting for server on 3121............
Client connected on 3121..............
Waiting for server on 3121............
Waiting for server on 3121............
Waiting for server on 3121............
Client connected on 3121..............
Waiting for server on 3121............
Waiting for server on 3121............
Client connected on 3121..............


In [24]:
# Reward for the reference scenario
pd.DataFrame(pd.DataFrame(ddpg_ref).sum()).rename(columns={0: "ddpg_ref"})

Unnamed: 0,ddpg_ref
alpine-2,-16.621978
e-track-6,-18.431343
g-track-3,-11.332032


In [25]:
# Reward for the ddpg_1 scenario
pd.DataFrame(pd.DataFrame(ddpg_1).sum()).rename(columns={0: "ddpg_1"})

Unnamed: 0,ddpg_1
alpine-2,215.928331
e-track-6,296.9248
g-track-3,280.188617


In [23]:
# Reward for the ddpg_1 scenario
pd.DataFrame(pd.DataFrame(ddpg_2).sum()).rename(columns={0: "ddpg_2"})

Unnamed: 0,ddpg_2
alpine-2,218.888216
e-track-6,305.312837
g-track-3,280.339358


In [20]:
# Reward for the a3c_1 scenario
pd.DataFrame(pd.DataFrame(a3c).sum()).rename(columns={0: "a3c"})

Unnamed: 0,a3c
alpine-2,-5.983607
e-track-6,-27.003535
g-track-3,-58.951636
