In [None]:
import tensorflow as tf
import gym

from rltensor.agents import DQN
from rltensor.processors import AtariProcessor
from rltensor.executions import Runner


env = gym.make('Breakout-v0')

conf = dict(
    action_spec={"type": "int", "shape": env.action_space.n},
    critic_spec=[
            {"name": "conv2d", "kernel_size":(8, 8), "num_filters":32, "stride":4,
             "padding": 'SAME', "is_batch":False, 'activation': tf.nn.relu},
            {"name": "conv2d", "kernel_size":(5, 5), "num_filters":64, "stride":2,
             "padding": 'SAME', "is_batch":True, 'activation': tf.nn.relu},
           {"name": "conv2d", "kernel_size": (3, 3), "num_filters":64, "stride":1,
             "padding": 'SAME', "is_batch":True, 'activation': tf.nn.relu},
            {"name": "dense", "is_flatten":True, "is_batch":True, "num_units": 512, 'activation': tf.nn.relu},
        ],
    optimizer_spec={"type": "rmsp"},
    lr_spec={"lr_init": 2.5e-4, "lr_decay_step": 100, "lr_decay": 0.9, "lr_min": 2.5e-4},
    state_spec=None,
    processor=AtariProcessor(84, 84),
    explore_spec={"t_ep_end": int(1e6), "ep_start": 1.0, "ep_end": 0.1},
    memory_limit=100000,
    window_length=4,
    is_prioritized=False,
    batch_size=32,
    error_clip=1.0,
    discount=0.99,
    t_target_q_update_freq=10000,
    double_q=True,
    t_learn_start=100,
    t_update_freq=4,
    min_r=None,
    max_r=None,
    sess=None,
)

env_name = 'Breakout-v0'
env = gym.make(env_name)
with tf.device('/cpu:0'):
    tf.reset_default_graph()
    dqn = DQN(**conf)
    runner = Runner(agent=dqn, env=env, env_name=env_name, tensorboard_dir="./logs")
    runner.fit(int(1e7), render_freq=None, save_video_path="./videos")

Building tensorflow graph...
Finished building tensorflow graph, spent time: 0.6238322257995605




  0%|          | 0/10000000 [00:00<?, ?it/s][A

Model saved in file: params/model.ckpt



  0%|          | 22/10000000 [00:00<12:55:12, 215.00it/s][A
  0%|          | 54/10000000 [00:00<10:24:24, 266.92it/s][A
  0%|          | 78/10000000 [00:00<10:48:48, 256.88it/s][A
  0%|          | 101/10000000 [00:00<12:26:22, 223.30it/s][A
  0%|          | 118/10000000 [00:01<41:38:31, 66.71it/s] [A
  0%|          | 130/10000000 [00:02<58:38:07, 47.37it/s][A
  0%|          | 139/10000000 [00:03<68:35:01, 40.50it/s][A
  0%|          | 146/10000000 [00:04<79:57:41, 34.74it/s][A
  0%|          | 151/10000000 [00:04<84:19:35, 32.94it/s][A
  0%|          | 283/10000000 [00:30<294:40:34,  9.43it/s][A
Exception in thread Thread-5:
Traceback (most recent call last):
  File "/home/tom/anaconda3/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/home/tom/anaconda3/lib/python3.6/site-packages/tqdm/_tqdm.py", line 144, in run
    for instance in self.tqdm_cls._instances:
  File "/home/tom/anaconda3/lib/python3.6/_weakrefset.py", line 60, in __iter__
    f

In [None]:
from collections import deque

x = list(deque(maxlen=3))

In [32]:
x.insert(0, 4)
x

[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]

In [3]:
import tensorflow as tf
import gym

from rltensor.agents import DQN
from rltensor.processors import AtariProcessor
from rltensor.networks import DuelingModel


conf = {"q_conf":[
            {"name": "conv2d", "kernel_size":(8, 8), "num_filter":32, "stride":4,
             "padding": 'SAME', "is_batch":False, 'activation': tf.nn.relu},
            {"name": "conv2d", "kernel_size":(5, 5), "num_filter":64, "stride":2,
             "padding": 'SAME', "is_batch":False, 'activation': tf.nn.relu},
           {"name": "conv2d", "kernel_size": (3, 3), "num_filter":64, "stride":1,
             "padding": 'SAME', "is_batch":False, 'activation': tf.nn.relu},
            {"name": "dense", "is_flatten":True, "is_batch":False, "num_hidden": 512, 'activation': tf.nn.relu},
        ],
        "env_name": 'Breakout-v0',
        "processor": AtariProcessor(84, 84),
}

env = gym.make('Breakout-v0')
tf.reset_default_graph()
dqn = DQN(env, conf, q_network_cls=DuelingModel)
dqn.play(num_episode=10, ep=0.05, load_file_path="./breakout_dqn_params/model.ckpt",
         save_video_path="./breakout_videos", render_freq=1)

[2017-07-30 21:57:02,794] Making new env: Breakout-v0
[2017-07-30 21:57:03,411] Finished writing results. You can upload them to the scoreboard via gym.upload('/home/tomoaki/work/Development/RL/breakout_videos')


INFO:tensorflow:Restoring parameters from ./breakout_dqn_params/model.ckpt


[2017-07-30 21:57:03,497] Restoring parameters from ./breakout_dqn_params/model.ckpt
[2017-07-30 21:57:03,531] Clearing 8 monitor files from previous run (because force=True was provided)
[2017-07-30 21:57:03,538] Starting new video recorder writing to /home/tomoaki/work/Development/RL/breakout_videos/openaigym.video.2.3241.video000000.mp4



Model restored.


[2017-07-30 21:57:35,712] Starting new video recorder writing to /home/tomoaki/work/Development/RL/breakout_videos/openaigym.video.2.3241.video000001.mp4

[2017-07-30 22:01:21,034] Starting new video recorder writing to /home/tomoaki/work/Development/RL/breakout_videos/openaigym.video.2.3241.video000008.mp4



In [None]:
if [1, 2, 3]:
    print("hello")

In [None]:
env.action_space.n

In [None]:
y.get_shape().as_list()

In [None]:
count = 4
while count < 5:
    print(count)
    count += 1

In [None]:
"%s" % True

In [None]:
a.insert(0, 2)

In [15]:
a

[2, 1]

In [11]:
np.random.randint(0, 2, 10)

array([0, 0, 0, 1, 0, 0, 0, 1, 0, 0])

In [42]:
from collections import deque

In [44]:
x = deque([1, 2, 3], maxlen=5)

In [45]:
x.append(3)
x.append(3)
x.append(3)

In [46]:
x

deque([2, 3, 3, 3, 3])

In [57]:
 result = tf.select(pred, val_if_true, val_if_false)

AttributeError: module 'tensorflow' has no attribute 'select'

In [60]:
x = tf.placeholder(tf.bool, (None,))
y = tf.cast(x, tf.int32)
z = tf.one_hot(y, 2)

In [62]:
sess = tf.InteractiveSession()
print(y.eval(feed_dict={x:[True, False, True]}))
print(z.eval(feed_dict={x:[True, False, True]}))

[1 0 1]
[[ 0.  1.]
 [ 1.  0.]
 [ 0.  1.]]


In [13]:
type(np.arange(10).astype(int)[0])

numpy.int64

In [16]:
type(np.random.choice(range(0, 5), 3)[0])

numpy.int64

In [44]:
np.random.choice([1, 2, 3, 4], 3, False)

array([1, 4, 3])

In [12]:
x = np.arange(10)
np.append(x, 10)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [13]:
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])