In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from __future__ import print_function

import numpy as np
import tempfile
import tensorflow as tf

from tf_rl.controller import DiscreteDeepQ, ModelController
from tf_rl.simulation import KarpathyGame
from tf_rl import simulate
from tf_rl.models import MLP
from collections import OrderedDict
from euclid import Vector2


In [3]:
LOG_DIR = tempfile.mkdtemp()
print(LOG_DIR)

/var/folders/bl/qrhjk6_159n3pcdvx70pgjqc0000gn/T/tmpl8yrfE


In [4]:
current_settings = {
    #earlier objects are eaten by later objects (pred eat prey)
    'objects': [
        'prey',
        'pred',
    ],
    'colors': {
        'prey': [212, 211, 208],
        'pred':  [84, 37, 0],
    },
    'object_reward': {
        'prey': {'prey': 0.1, 'pred': -0.1},
        'pred': {'prey': 1.0, 'pred': -1.0}
    },
    'hero_bounces_off_walls': False,
    'world_size': (700,500),   
    "maximum_velocity":      {'prey': 50, 'pred': 50},
    "object_radius": 10.0,
    "num_objects": OrderedDict([('prey', 25), ('pred', 25)]),
    "num_objects_active": OrderedDict([('prey', 2), ('pred', 0)]),
    "num_observation_lines" : 32,
    "observation_line_length": 120.,
    "tolerable_distance_to_wall": 50,
    "wall_distance_penalty":  -0.0,
    "delta_v": 50
}

RUN = 'new'  #'new' to create new sim with values above
             #'load' to load a previously trained graph
num_steps = 759000

save = False

In [5]:
# create the game simulator
g = KarpathyGame(current_settings)
print(g.get_list(0))

('num_actions ', 5)
[0, 1]


In [6]:

journalist = tf.summary.FileWriter(LOG_DIR)

tf.reset_default_graph()

if RUN == 'new':

    if current_settings['num_objects_active']['prey'] != 0:

        with tf.variable_scope('prey'):
            brain_prey = MLP([g.observation_size,], [300, 200, g.num_actions], 
                         [tf.tanh, tf.tanh, tf.identity])

            # The optimizer to use. Here we use RMSProp as recommended
            # by the publication
            optimizer = tf.train.RMSPropOptimizer(learning_rate= 0.001, decay=0.9)

            controller_prey = DiscreteDeepQ((g.observation_size,), g.num_actions,
                                            brain_prey, optimizer, discount_rate=0.99, 
                                            exploration_period=5000, max_experience=10000, 
                                            store_every_nth=2, train_every_nth=4,
                                            summary_writer=journalist)




    if current_settings['num_objects_active']['pred'] != 0:
        with tf.variable_scope('pred'):
            brain_pred = MLP([g.observation_size,], [200, 200, g.num_actions], 
                         [tf.nn.relu, tf.nn.relu, tf.identity])

            # The optimizer to use. Here we use RMSProp as recommended
            # by the publication
            optimizer = tf.train.RMSPropOptimizer(learning_rate= 0.001, decay=0.9)

            # DiscreteDeepQ object
            controller_pred = ModelController((g.observation_size,), g.num_actions, 
                                       discount_rate=0.99, exploration_period=1000, max_experience=10000, 
                                       store_every_nth=4, train_every_nth=4,
                                       summary_writer=journalist)


elif RUN == 'load':

    if current_settings['num_objects']['prey'] != 0:
        prey_controller_folder = 'saved_graphs'
        if prey_controller_folder != 'none':
            #with tf.variable_scope('prey'):
                brain_prey = MLP([g.observation_size,], [200, 400, g.num_actions], 
                             [tf.tanh, tf.tanh, tf.identity])

                # The optimizer to use. Here we use RMSProp as recommended
                # by the publication
                optimizer = tf.train.RMSPropOptimizer(learning_rate= 0.001, decay=0.9)

                controller_prey = DiscreteDeepQ((g.observation_size,), g.num_actions, brain_prey, optimizer,
                                           discount_rate=0.99, exploration_period=5000, max_experience=10000, 
                                           store_every_nth=2, train_every_nth=4,
                                           summary_writer=journalist)
                controller_prey.restore(prey_controller_folder, False, num_steps)
    
    if current_settings['num_objects']['pred'] != 0:
        #with tf.variable_scope('pred'):

            # DiscreteDeepQ object
            controller_pred = ModelController((g.observation_size,), g.num_actions, 
                                       discount_rate=0.99, exploration_period=1000, max_experience=10000, 
                                       store_every_nth=4, train_every_nth=4,
                                       summary_writer=journalist)           

else:
    raise NameError('set RUN variable to \'new\' or \'load\'')
    
if current_settings['num_objects_active']['pred'] == 0:
    controller = [controller_prey]
elif current_settings['num_objects_active']['prey'] == 0:
    controller = [controller_pred]
else:
    controller = [controller_prey, controller_pred]

print('controller', controller, ' pred num ', current_settings['num_objects']['pred'],
     'prey num ', current_settings['num_objects']['prey'])


INFO:tensorflow:Summary name prey/MLP/input_layer/W_0:0 is illegal; using prey/MLP/input_layer/W_0_0 instead.
INFO:tensorflow:Summary name prey/MLP/input_layer/W_0:0/gradients is illegal; using prey/MLP/input_layer/W_0_0/gradients instead.
INFO:tensorflow:Summary name prey/MLP/input_layer/b:0 is illegal; using prey/MLP/input_layer/b_0 instead.
INFO:tensorflow:Summary name prey/MLP/input_layer/b:0/gradients is illegal; using prey/MLP/input_layer/b_0/gradients instead.
INFO:tensorflow:Summary name prey/MLP/hidden_layer_0/W_0:0 is illegal; using prey/MLP/hidden_layer_0/W_0_0 instead.
INFO:tensorflow:Summary name prey/MLP/hidden_layer_0/W_0:0/gradients is illegal; using prey/MLP/hidden_layer_0/W_0_0/gradients instead.
INFO:tensorflow:Summary name prey/MLP/hidden_layer_0/b:0 is illegal; using prey/MLP/hidden_layer_0/b_0 instead.
INFO:tensorflow:Summary name prey/MLP/hidden_layer_0/b:0/gradients is illegal; using prey/MLP/hidden_layer_0/b_0/gradients instead.
INFO:tensorflow:Summary name pre

In [None]:
FPS          = 30
ACTION_EVERY = 3
    
fast_mode = False
if fast_mode:
    WAIT, VISUALIZE_EVERY = False, 100
else:
    WAIT, VISUALIZE_EVERY = True, 1

    
try:
    simulate(simulation=g,
             controller=controller,
             fps=FPS,
             visualize_every=VISUALIZE_EVERY,
             action_every=ACTION_EVERY,
             wait=WAIT,
             disable_training=False,
             simulation_resolution=.001,
             save_path=None)
except KeyboardInterrupt:
    print("Interrupted")
    g.shut_down_graphics()
    print('graphics shut down')
    for controller_instance in controller:
        controller_instance.kill_session()
    print('controller sessions shut down')
        

34
done in 0.609297037125 s


In [12]:
session.run(current_controller.target_network_update)

NameError: name 'session' is not defined

In [9]:
current_controller.q_network.input_layer.Ws[0].eval()

NameError: name 'current_controller' is not defined

In [None]:
current_controller.target_q_network.input_layer.Ws[0].eval()

# Average Reward over time

In [None]:
g.plot_reward(smoothing=100)

In [None]:
session.run(current_controller.target_network_update)

In [None]:
current_controller.q_network.input_layer.Ws[0].eval()

In [None]:
current_controller.target_q_network.input_layer.Ws[0].eval()

# Visualizing what the agent is seeing

Starting with the ray pointing all the way right, we have one row per ray in clockwise order.
The numbers for each ray are the following:
- first three numbers are normalized distances to the closest visible (intersecting with the ray) object. If no object is visible then all of them are $1$. If there's many objects in sight, then only the closest one is visible. The numbers represent distance to friend, enemy and wall in order.
- the last two numbers represent the speed of moving object (x and y components). Speed of wall is ... zero.

Finally the last two numbers in the representation correspond to speed of the hero.

In [None]:
g.__class__ = KarpathyGame
np.set_printoptions(formatter={'float': (lambda x: '%.2f' % (x,))})
x = g.observe()
new_shape = (x[:-4].shape[0]//g.eye_observation_size, g.eye_observation_size)
print(x[:-4].reshape(new_shape))
print(x[-4:])
g.to_html()

In [13]:
x = {'prey':{'prey':0, 'pred':0},'pred':{'prey':0, 'pred':0}}
x['pred']['prey'] = 1
x

{'pred': {'pred': 0, 'prey': 1}, 'prey': {'pred': 0, 'prey': 0}}

In [13]:
import collections
d = collections.OrderedDict()
d['apple'] = 4
x = collections.OrderedDict([('apple', 4), ('banana', 3), ('orange', 2), ('pear', 1)])
for key, a in x.items():
    print(key + ' ' + str(a))

apple 4
banana 3
orange 2
pear 1


In [10]:
from tf_rl.controller import ModelController

In [12]:
import random
import numpy as np

In [14]:
speed    = np.random.uniform([-5,-5], [5,5]).astype(float)
print('1' , speed[0])
print('2', speed[1])


1 1.55353514184
2 -1.95956103972


In [11]:
(np.arctan2(1,0) + 11 * np.pi / 12.0) * 180 / np.pi

-74.999999999999986

In [12]:
(np.arctan2(1,0) + 11 * np.pi / 12.0) * 180 / np.pi

255.0

In [13]:
np.linspace(
                (-11 * np.pi / 12.0 + np.pi/2.0)* 180 / np.pi , 
                (11 * np.pi / 12.0+ np.pi/2.0)* 180 / np.pi ,
                11,
                endpoint=False)

array([ -75.,  -45.,  -15.,   15.,   45.,   75.,  105.,  135.,  165.,
        195.,  225.])

In [10]:
x = np.int_(1)
x = x.astype(float)
x.dtype

dtype('float64')

In [8]:
from euclid import Circle, Point2, Vector2, LineSegment2
import numpy as np
x = Point2(391.11, 339.06)
z = np.int_(3)
y = Point2(460.84, 142.64)
y0 = Point2(3, z)
x.distance(y)

208.43005853283253

In [8]:


w1 = tf.Variable(tf.truncated_normal(shape=[10]), name='w1')
w2 = tf.Variable(tf.truncated_normal(shape=[20]), name='w2')
tf.add_to_collection('vars', w1)
tf.add_to_collection('vars', w2)
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
saver.save(sess, 'my-model')
# `save` method will call `export_meta_graph` implicitly.
# you will get saved graph files:my-model.meta

sess = tf.Session()
new_saver = tf.train.import_meta_graph('my-model.meta')
new_saver.restore(sess, tf.train.latest_checkpoint('./'))
all_vars = tf.get_collection('vars')
for v in all_vars:
    v_ = sess.run(v)
    print(v_)

[ 0.45261216 -0.81603801 -0.21161796  1.71767867  0.53031474 -0.33406609
  0.07414658  0.71746457 -0.79497665 -0.81216109]
[ 0.05205382  0.1825629   0.61859673  0.13708994  0.92213231 -0.4189474
 -1.11779344  0.20065963  1.00620401  0.98354363  1.31322467 -0.68679833
 -0.73815197 -1.21130407  0.53206164 -0.35457915 -0.70935756 -0.15833597
 -0.17378537  1.6402905 ]
[ 0.45261216 -0.81603801 -0.21161796  1.71767867  0.53031474 -0.33406609
  0.07414658  0.71746457 -0.79497665 -0.81216109]
[ 0.05205382  0.1825629   0.61859673  0.13708994  0.92213231 -0.4189474
 -1.11779344  0.20065963  1.00620401  0.98354363  1.31322467 -0.68679833
 -0.73815197 -1.21130407  0.53206164 -0.35457915 -0.70935756 -0.15833597
 -0.17378537  1.6402905 ]


In [15]:
import numpy as np
test = [None] * 2
test[0] = np.ones(100)

In [16]:
print(test
     )

[array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.]), None]


In [19]:
print(test[0] is not None)
print(test[0])


True
[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]


In [38]:
result = all(w!=None for w in test[1])
print(result)


False


In [37]:
result


True