# Main file to run experiments

If you do not know, where to put your code, put it here

In [11]:
from dlpf.base_utils import *

In [12]:
logger = init_log(out_file = 'testbed.log', stderr = False)

In [13]:
import gym
from dlpf.agents import DqnAgent, RandomAgent

## Test with Random agent

Pipeline works, but agent does not learn))

In [14]:
# env = gym.make('PathFindingByPixel-v1')
# env.configure(tasks_dir = os.path.abspath('data/sample/imported/'), scale = 10, map_shape = (10, 10))
# env.monitor.start('data/sample/results/random', force=True, seed=0)

# agent = RandomAgent(env.action_space)

# episode_count = 1
# max_steps = 10
# reward = 0
# done = False

# for i in range(episode_count):
#     ob = env.reset()

#     for j in range(max_steps):
#         action = agent.act(ob, reward, done)
#         ob, reward, done, _ = env.step(action)
#         if done:
#             break
#         # Note there's no env.render() here. But the environment still can open window and
#         # render if asked by env.monitor: it calls env.render('rgb_array') to record video.
#         # Video is not recorded every episode, see capped_cubic_video_schedule for details.

# # Dump result info to disk
# env.monitor.close()

## Basic DQN

https://www.cs.toronto.edu/~vmnih/docs/dqn.pdf

In [15]:
env = gym.make('PathFindingByPixel-v1')
env.configure(tasks_dir = os.path.abspath('data/sample/imported/'), monitor_scale = 10, map_shape = (10, 10))

In [16]:
env.monitor.start('data/sample/results/basic_dqn', force=True, seed=0)

In [17]:
agent = DqnAgent(state_size = env.observation_space.shape,
                 number_of_actions = env.action_space.n,
                 save_name = env.__class__.__name__)

Training a new model


In [18]:
episode_count = 5000
max_steps = 100

for _ in xrange(episode_count):
    observation = env.reset()
    agent.new_episode()
    for _ in range(max_steps):
        action, values = agent.act(observation)
        observation, reward, done, info = env.step(action)
        agent.observe(reward)
        if done:
            break

In [19]:
!rm PathFindingByPixelEnv.h5

## Optimize performance

In [1]:
from dlpf.io import TaskSet
import numpy, collections
import numba

Using Theano backend.
[2016-09-10 16:15:31,894] CUDA is installed, but device gpu is not available  (error: Unable to get the number of gpus available: CUDA driver version is insufficient for CUDA runtime version)


In [2]:
t = TaskSet('data/current/imported/paths/', 'data/current/imported/maps/')

In [3]:
tt = t[t.keys()[0]]

In [4]:
DIFFS = numpy.array([
        [-1, 0],
        [-1, 1],
        [0,  1],
        [1,  1],
        [1,  0],
        [1, -1],
        [0, -1],
        [-1, -1]
], dtype = 'int32')

In [5]:
BY_PIXEL_ACTION_DIFFS = {
    0 : numpy.array([-1,  0], dtype = 'int8'),
    1 : numpy.array([-1,  1], dtype = 'int8'),
    2 : numpy.array([ 0,  1], dtype = 'int8'),
    3 : numpy.array([ 1,  1], dtype = 'int8'),
    4 : numpy.array([ 1,  0], dtype = 'int8'),
    5 : numpy.array([ 1, -1], dtype = 'int8'),
    6 : numpy.array([ 0, -1], dtype = 'int8'),
    7 : numpy.array([-1, -1], dtype = 'int8')
}

In [6]:
%load_ext Cython

In [22]:
%%cython --annotate

import collections
cimport numpy as np

DIFFS = np.array([
        [-1, 0],
        [-1, 1],
        [0,  1],
        [1,  1],
        [1,  0],
        [1, -1],
        [0, -1],
        [-1, -1]
], dtype = 'int32')

def build_distance_map(local_map, finish):
    result = -np.array(local_map, dtype='int32')
    
    queue = collections.deque()
    queue.append((finish, 0))
    result[finish] = 0

    while queue:
        cur_point, cur_dist = queue.popleft()
        new_dist = cur_dist + 1

        for new_point in DIFFS + cur_point:
            new_point = tuple(new_point)

            if (0 <= new_point[0] < local_map.shape[0] and 0 <= new_point[1] < local_map.shape[1] # we are in boundaries
                and new_point != finish
                and result[new_point] == 0): # we are not going to obstacle and we have not filled this cell yet
                queue.append((new_point, new_dist))
                result[new_point] = new_dist

    return result

NameError: name 'np' is not defined

In [24]:
numpy.uint8

numpy.uint8

In [15]:
build_distance_map(tt.local_map, tt.finish)

array([[ 84,  84,  84, ..., 490, 491, 492],
       [ 83,  83,  83, ..., 490, 491, 492],
       [ 82,  82,  82, ..., 490, 491, 492],
       ..., 
       [ -1,  -1,  -1, ..., 520, 520, 520],
       [  0,  -1,  -1, ..., 521, 521, 521],
       [  0,  -1,  -1, ..., 522, 522, 522]], dtype=int32)

In [9]:
%timeit build_distance_map(tt.local_map, tt.finish)

1 loop, best of 3: 12.7 s per loop


In [12]:
build_distance_map(TEST_MAP, (1, 1))

array([[-1, -1, -1,  2,  3,  4],
       [-1,  0,  1,  2,  3,  4],
       [-1,  1, -1,  2,  3,  4],
       [-1,  2, -1,  3,  3,  4],
       [-1,  3, -1,  4,  4,  4],
       [ 4,  4,  4,  5,  5,  5]], dtype=int32)

In [33]:
(1, 2) + numpy.array([[0, 0], [0, 0]])

array([[1, 2],
       [1, 2]])

In [16]:
%timeit build_distance_map(tt.local_map, tt.finish)

1 loop, best of 3: 15.1 s per loop


In [39]:
%timeit build_distance_map_np(tt.local_map, tt.finish)

KeyboardInterrupt: 

In [11]:
TEST_MAP = numpy.array([
    [ 1,  1,  1, 0, 0, 0],
    [ 1,  0,  0, 0, 0, 0],
    [ 1,  0,  1, 0, 0, 0],
    [ 1,  0,  1, 0, 0, 0],
    [ 1,  0,  1, 0, 0, 0],
    [ 0,  0,  0, 0, 0, 0]
], dtype = 'int32')

In [27]:
getitem(TEST_MAP, *numpy.array([1, 2]))

NameError: name 'getitem' is not defined