In [4]:
import os
import threading
import multiprocessing
import numpy as np
import tensorflow as tf

from worker import Worker
from ac_network import AC_Network
import warnings
warnings.filterwarnings('ignore')

In [5]:
ENV_NAME = 'LunarLander-v2'
STATE_DIM = 8
ACTION_DIM = 4
MONITOR_DIR = './results/' + ENV_NAME

In [6]:
RANDOM_SEED = 1234
LOAD_MODEL = False
TEST_MODEL = False
MODEL_DIR = './model/'
LEARNING_RATE = 0.0001
GAMMA = 0.99

In [None]:
global master_network
global global_episodes

tf.reset_default_graph()

if not os.path.exists(MODEL_DIR):
    os.makedirs(MODEL_DIR)

with tf.device("/cpu:0"):
    np.random.seed(RANDOM_SEED)
    tf.set_random_seed(RANDOM_SEED)

    global_episodes = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
    trainer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
    master_network = AC_Network(STATE_DIM, ACTION_DIM, 'global', None)  # Generate global network
    num_workers = multiprocessing.cpu_count()  # Set workers to number of available CPU threads

    # For testing and visualisation we only need one worker
    if TEST_MODEL:
        num_workers = 1

    workers = []
    # Create worker classes
    for i in range(num_workers):
        workers.append(Worker(i, STATE_DIM, ACTION_DIM, trainer, MODEL_DIR, global_episodes,
                              ENV_NAME, RANDOM_SEED, TEST_MODEL))
    saver = tf.train.Saver(max_to_keep=5)

with tf.Session() as sess:
    coord = tf.train.Coordinator()
    if LOAD_MODEL or TEST_MODEL:
        print('Loading Model...')
        ckpt = tf.train.get_checkpoint_state(MODEL_DIR)
        saver.restore(sess, ckpt.model_checkpoint_path)
    else:
        sess.run(tf.global_variables_initializer())

    if TEST_MODEL:
        env = workers[0].get_env()
        env.monitor.start(MONITOR_DIR, force=True)
        workers[0].work(GAMMA, sess, coord, saver)
    else:
        worker_threads = []
        for worker in workers:
            worker_work = lambda: worker.work(GAMMA, sess, coord, saver)
            t = threading.Thread(target=(worker_work))
            t.start()
            worker_threads.append(t)
        coord.join(worker_threads)



Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Please use `layer.__call__` method instead.




Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Starting worker 0
Starting worker 4Starting worker 6Starting worker 5Starting worker 7Starting worker 1
Starting worker 3
Starting worker 9

Starting worker 8

Starting worker 10

Starting worker 11

Starting worker 2
| Reward: -96.10713740501198  | Episode 0
| Reward: -159.34258021786735  | Episode 1
| Reward: -151.1287448997877  | Episode 2
| Reward: -99.97526072476431  | Episode 3
| Reward: -69.70533291618

| Reward: -663.6658149432657  | Episode 101
| Reward: -473.0927157938082  | Episode 102
| Reward: -722.5671389616427  | Episode 103
| Reward: -532.5099589635952  | Episode 104
| Reward: -319.3435799176397  | Episode 105
| Reward: -711.9289899402567  | Episode 106
| Reward: -518.9875957327008  | Episode 107
| Reward: -476.7866063755406  | Episode 108
| Reward: -779.0086291680233  | Episode 109
| Reward: -429.5388085186137  | Episode 110
| Reward: -815.1879002248353  | Episode 111
| Reward: -463.83606001457423  | Episode 112
| Reward: -776.7267043457507  | Episode 113
| Reward: -484.51631818044524  | Episode 114
| Reward: -511.8756274339147  | Episode 115
| Reward: -525.2181347864189  | Episode 116
| Reward: -495.8327361992739  | Episode 117
| Reward: -799.5721026599615  | Episode 118
| Reward: -562.9692163246934  | Episode 119
| Reward: -492.3371948033906  | Episode 120
| Reward: -910.7034180226616  | Episode 121
| Reward: -818.7538880065509  | Episode 122
| Reward: -738.4175633022783  

| Reward: -491.0975655398538  | Episode 287
| Reward: -486.3762932403011  | Episode 288
| Reward: -516.1653796672526  | Episode 289
| Reward: -454.36769075649045  | Episode 290
| Reward: -381.40865970600976  | Episode 291
| Reward: -348.4263118196319  | Episode 292
| Reward: -663.9204074075562  | Episode 293
| Reward: -993.1760381665325  | Episode 294
| Reward: -395.3367916932651  | Episode 295
| Reward: -371.8701407911708  | Episode 296
| Reward: -436.4100691675067  | Episode 297
| Reward: -369.7932628194744  | Episode 298
| Reward: -561.1779857909232  | Episode 299
| Reward: -418.23806677370163  | Episode 300
| Reward: -350.0554865114369  | Episode 301
| Reward: -742.6480671179107  | Episode 302
| Reward: -197.73179979763586  | Episode 303
| Reward: -572.9531161533151  | Episode 304
| Reward: -518.0387255505611  | Episode 305
| Reward: -371.4623792611275  | Episode 306
| Reward: -519.8736859162448  | Episode 307
| Reward: -297.40950691682104  | Episode 308
| Reward: -476.320570751542