In [1]:
import os
import threading
import multiprocessing
import numpy as np
import tensorflow as tf

from worker import Worker
from ac_network import AC_Network

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [2]:
ENV_NAME = 'LunarLander-v2'
STATE_DIM = 8
ACTION_DIM = 4
MONITOR_DIR = './results/' + ENV_NAME

In [3]:
RANDOM_SEED = 1234
LOAD_MODEL = True
TEST_MODEL = False
MODEL_DIR = './model/'
LEARNING_RATE = 0.0001
GAMMA = 0.99

In [None]:
global master_network
global global_episodes

tf.reset_default_graph()

if not os.path.exists(MODEL_DIR):
    os.makedirs(MODEL_DIR)

with tf.device("/cpu:0"):
    np.random.seed(RANDOM_SEED)
    tf.set_random_seed(RANDOM_SEED)

    global_episodes = tf.Variable(0, dtype=tf.int32, name='global_episodes', trainable=False)
    trainer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
    master_network = AC_Network(STATE_DIM, ACTION_DIM, 'global', None)
    num_workers = multiprocessing.cpu_count()

    if TEST_MODEL:
        num_workers = 1

    workers = []
    for i in range(num_workers):
        workers.append(Worker(i, STATE_DIM, ACTION_DIM, trainer, MODEL_DIR, global_episodes,
                              ENV_NAME, RANDOM_SEED, TEST_MODEL))
    saver = tf.train.Saver(max_to_keep=5)

with tf.Session() as sess:
    coord = tf.train.Coordinator()
    if LOAD_MODEL or TEST_MODEL:
        print('Loading Model...')
        ckpt = tf.train.get_checkpoint_state(MODEL_DIR)
        saver.restore(sess, ckpt.model_checkpoint_path)
    else:
        sess.run(tf.global_variables_initializer())

    if TEST_MODEL:
        env = workers[0].get_env()
        env.monitor.start(MONITOR_DIR, force=True)
        workers[0].work(GAMMA, sess, coord, saver)
    else:
        worker_threads = []
        for worker in workers:
            worker_work = lambda: worker.work(GAMMA, sess, coord, saver)
            t = threading.Thread(target=(worker_work))
            t.start()
            worker_threads.append(t)
        coord.join(worker_threads)



Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Please use `layer.__call__` method instead.




Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Loading Model...
INFO:tensorflow:Restoring parameters from ./model/model-0.cptk
Starting worker 0
Starting worker 1Starting worker 3
Starting worker 4
Starting worker 5
Starting worker 8
Starting worker 10

Starting worker 7
Starting worker 11
Starting worker 2
Starting worker 6
Starting worker 9
| Reward: -95.57730562970124  | Episode 0
| Reward: 56.966037677509775  | Episode 1
| Reward: -220.18261897171968 

| Reward: -157.5401528843836  | Episode 101
| Reward: -307.3166667898922  | Episode 102
| Reward: -497.521373227829  | Episode 103
| Reward: -313.3184559543699  | Episode 104
| Reward: -244.12016669067407  | Episode 105
| Reward: -505.79193836206775  | Episode 106
| Reward: -562.5872289315391  | Episode 107
| Reward: -403.41901902994476  | Episode 108
| Reward: -417.5078489211102  | Episode 109
| Reward: -742.2423971594  | Episode 110
| Reward: -506.3022446243127  | Episode 111
| Reward: -766.3260017345631  | Episode 112
| Reward: -424.7695004206292  | Episode 113
| Reward: -480.6775139470428  | Episode 114
| Reward: -681.7389035081382  | Episode 115
| Reward: -488.7323805529567  | Episode 116
| Reward: -422.3243489533991  | Episode 117
| Reward: -362.77322973152013  | Episode 118
| Reward: -372.46608591559794  | Episode 119
| Reward: -374.79724457936953  | Episode 120
| Reward: -490.27017073320735  | Episode 121
| Reward: -368.12170922177245  | Episode 122
| Reward: -288.6574107794689

| Reward: -121.17869646969783  | Episode 286
| Reward: -115.02873164630824  | Episode 287
| Reward: -259.7109665847762  | Episode 288
| Reward: -148.66973461274443  | Episode 289
| Reward: -426.19692440662675  | Episode 290
| Reward: -369.96488455742866  | Episode 291
| Reward: -232.08053999736234  | Episode 292
| Reward: -578.5144275586229  | Episode 293
| Reward: -435.17660317304785  | Episode 294
| Reward: -240.4543362506779  | Episode 295
| Reward: -339.3660455238714  | Episode 296
| Reward: -149.6220724738991  | Episode 297
| Reward: -325.5820339149357  | Episode 298
| Reward: -340.8175220760778  | Episode 299
| Reward: -545.3920039997583  | Episode 300
| Reward: -225.43050742292212  | Episode 301
| Reward: -363.6777937168828  | Episode 302
| Reward: -131.04280419969425  | Episode 303
| Reward: -425.52777038558406  | Episode 304
| Reward: -407.7416765933781  | Episode 305
| Reward: -524.3923314346201  | Episode 306
| Reward: -435.2501169473733  | Episode 307
| Reward: -474.7249099

| Reward: -321.1936817228881  | Episode 471
| Reward: -401.37698703188437  | Episode 472
| Reward: -441.89493252394846  | Episode 473
| Reward: -369.97169267660485  | Episode 474
| Reward: -433.9030612645802  | Episode 475
| Reward: -291.6232884937781  | Episode 476
| Reward: -396.21724665371175  | Episode 477
| Reward: -236.81160341298747  | Episode 478
| Reward: -291.3150213242859  | Episode 479
| Reward: -249.3955610679711  | Episode 480
| Reward: -490.8230863059363  | Episode 481
| Reward: -464.0904337556225  | Episode 482
| Reward: -529.9664501126382  | Episode 483
| Reward: -368.21370589005267  | Episode 484
| Reward: -197.1434442493844  | Episode 485
| Reward: -186.28868207795438  | Episode 486
| Reward: -406.7714142213102  | Episode 487
| Reward: -107.86756755069867  | Episode 488
| Reward: -426.4442904702032  | Episode 489
| Reward: -166.81816588577004  | Episode 490
| Reward: -428.2552917532916  | Episode 491
| Reward: -331.61374901264037  | Episode 492
| Reward: -559.4954207

| Reward: -535.9459952580133  | Episode 649
| Reward: -332.96775780097016  | Episode 650
| Reward: -397.8763017634095  | Episode 651
| Reward: -430.3005559996153  | Episode 652
| Reward: -213.0935131889056  | Episode 653
| Reward: -115.83801838028334  | Episode 654
| Reward: -265.3281778004449  | Episode 655
| Reward: -455.4357264330369  | Episode 656
| Reward: -569.2177022858341  | Episode 657
| Reward: -351.7067777275478  | Episode 658
| Reward: -487.6074170944704  | Episode 659
| Reward: -441.3525289189223  | Episode 660
| Reward: -185.2265077236435  | Episode 661
| Reward: -621.9313122148427  | Episode 662
| Reward: -253.72939520127164  | Episode 663
| Reward: -532.8125210461614  | Episode 664
| Reward: -414.309067112621  | Episode 665
| Reward: -607.9104323795555  | Episode 666
| Reward: -606.0005745347787  | Episode 667
| Reward: -525.669373510988  | Episode 668
| Reward: -461.1247140562891  | Episode 669
| Reward: -573.2330654950108  | Episode 670
| Reward: -629.7764200934929  |

| Reward: -479.5450519022627  | Episode 836
| Reward: -399.89000944335015  | Episode 837
| Reward: -379.0516938937082  | Episode 838
| Reward: -464.0263424005126  | Episode 839
| Reward: -261.447166557596  | Episode 840
| Reward: -229.0855914283572  | Episode 841
| Reward: -273.90941248285975  | Episode 842
| Reward: -315.9851742080407  | Episode 843
| Reward: -442.11046164718874  | Episode 844
| Reward: -562.9062329382047  | Episode 845
| Reward: -568.1325108259318  | Episode 846
| Reward: -425.858896484457  | Episode 847
| Reward: -575.8161331842948  | Episode 848
| Reward: -322.20567014036374  | Episode 849
| Reward: -534.4713686636915  | Episode 850
| Reward: -560.8821944883935  | Episode 851
| Reward: -446.44778299945347  | Episode 852
| Reward: -486.9977778222987  | Episode 853
| Reward: -568.3845134346574  | Episode 854
| Reward: -539.1573525826122  | Episode 855
| Reward: -323.3653897640342  | Episode 856
| Reward: -411.9655598149647  | Episode 857
| Reward: -470.550238413636  

| Reward: -479.1537488002705  | Episode 1021
| Reward: -433.9500852416695  | Episode 1022
| Reward: -433.00110321779977  | Episode 1023
| Reward: -469.05589485273504  | Episode 1024
| Reward: -290.4980432925238  | Episode 1025
| Reward: -188.59892444995012  | Episode 1026
| Reward: -103.71959774537247  | Episode 1027
| Reward: -287.62782100128425  | Episode 1028
| Reward: -181.63751546250316  | Episode 1029
| Reward: -434.24599640580357  | Episode 1030
| Reward: -495.31179133403157  | Episode 1031
| Reward: -418.19148356796205  | Episode 1032
| Reward: -367.59943481968884  | Episode 1033
| Reward: -255.16591724520808  | Episode 1034
| Reward: -510.4316746253661  | Episode 1035
| Reward: -527.4873891953094  | Episode 1036
| Reward: -312.93927193084784  | Episode 1037
| Reward: -383.98170065330737  | Episode 1038
| Reward: -661.0818274978955  | Episode 1039
| Reward: -394.81069516550355  | Episode 1040
| Reward: -343.15838862375836  | Episode 1041
| Reward: -79.72391436723119  | Episode 

| Reward: -530.6699426990276  | Episode 1203
| Reward: -419.188890010562  | Episode 1204
| Reward: -544.7698428918666  | Episode 1205
| Reward: -419.4208359755324  | Episode 1206
| Reward: -495.90963567444845  | Episode 1207
| Reward: -404.61242165758057  | Episode 1208
| Reward: -867.030382142659  | Episode 1209
| Reward: -482.8161552695999  | Episode 1210
| Reward: -429.4464582308731  | Episode 1211
| Reward: -471.9247606475808  | Episode 1212
| Reward: -555.9543317963994  | Episode 1213
| Reward: -475.27676276465706  | Episode 1214
| Reward: -409.1422408122896  | Episode 1215
| Reward: -782.4879417510394  | Episode 1216
| Reward: -540.0592437422649  | Episode 1217
| Reward: -553.7603088571398  | Episode 1218
| Reward: -658.6701613780679  | Episode 1219
| Reward: -458.4249549437383  | Episode 1220
| Reward: -507.1864867327747  | Episode 1221
| Reward: -376.20250324001614  | Episode 1222
| Reward: -535.6812328529963  | Episode 1223
| Reward: -541.782577399701  | Episode 1224
| Reward:

| Reward: -388.4299531506838  | Episode 1385
| Reward: -490.35344341313805  | Episode 1386
| Reward: -440.85672303830256  | Episode 1387
| Reward: -636.3782852562063  | Episode 1388
| Reward: -785.4402836793346  | Episode 1389
| Reward: -466.11165820291546  | Episode 1390
| Reward: -695.1197663676472  | Episode 1391
| Reward: -474.6990210581636  | Episode 1392
| Reward: -955.9841923210039  | Episode 1393
| Reward: -518.6125964742378  | Episode 1394
| Reward: -692.2870435335371  | Episode 1395
| Reward: -742.8106938052073  | Episode 1396
| Reward: -402.4762626728086  | Episode 1397
| Reward: -479.36083021701427  | Episode 1398
| Reward: -550.6143040647881  | Episode 1399
| Reward: -998.0869665075769  | Episode 1400
| Reward: -949.1614880423775  | Episode 1401
| Reward: -477.6855939608543  | Episode 1402
| Reward: -693.9599318257735  | Episode 1403
| Reward: -473.2257123124406  | Episode 1404
| Reward: -514.3910941451227  | Episode 1405
| Reward: -454.17915045025853  | Episode 1406
| Rew

| Reward: -423.82781231984484  | Episode 1567
| Reward: -685.9230060912366  | Episode 1568
| Reward: -683.9722151294058  | Episode 1569
| Reward: -558.0168129733636  | Episode 1570
| Reward: -595.6762861046655  | Episode 1571
| Reward: -797.0685537027997  | Episode 1572
| Reward: -647.3733929189465  | Episode 1573
| Reward: -705.9467853189012  | Episode 1574
| Reward: -691.2889967907058  | Episode 1575
| Reward: -473.9500192214142  | Episode 1576
| Reward: -775.5679883917744  | Episode 1577
| Reward: -522.5404766989587  | Episode 1578
| Reward: -514.3460700726887  | Episode 1579
| Reward: -561.6380098793243  | Episode 1580
| Reward: -641.4605115939955  | Episode 1581
| Reward: -637.9567204251119  | Episode 1582
| Reward: -478.10283475233683  | Episode 1583
| Reward: -620.4633027207849  | Episode 1584
| Reward: -704.0032754658187  | Episode 1585
| Reward: -290.4234780238094  | Episode 1586
| Reward: -828.0764298883245  | Episode 1587
| Reward: -767.8405727648172  | Episode 1588
| Reward

| Reward: -469.50097470958644  | Episode 1750
| Reward: -699.9145957415044  | Episode 1751
| Reward: -664.216126266865  | Episode 1752
| Reward: -901.5762597079249  | Episode 1753
| Reward: -736.9961491028639  | Episode 1754
| Reward: -603.8820999459285  | Episode 1755
| Reward: -317.3475834294038  | Episode 1756
| Reward: -451.03515481023476  | Episode 1757
| Reward: -534.5429825298638  | Episode 1758
| Reward: -572.4067664940826  | Episode 1759
| Reward: -375.67936329306144  | Episode 1760
| Reward: -929.6852950499764  | Episode 1761
| Reward: -566.7901014833463  | Episode 1762
| Reward: -529.047168833257  | Episode 1763
| Reward: -510.386844714582  | Episode 1764
| Reward: -501.22695793546603  | Episode 1765
| Reward: -480.37360932373394  | Episode 1766
| Reward: -657.6799609124497  | Episode 1767
| Reward: -640.9131253845698  | Episode 1768
| Reward: -672.4025017330827  | Episode 1769
| Reward: -498.2120371230038  | Episode 1770
| Reward: -585.5402286946908  | Episode 1771
| Reward

| Reward: -586.3173659237664  | Episode 1932
| Reward: -638.0083856806518  | Episode 1933
| Reward: -570.0057170023588  | Episode 1934
| Reward: -706.218937942726  | Episode 1935
| Reward: -568.0863748634972  | Episode 1936
| Reward: -516.7984732102337  | Episode 1937
| Reward: -541.3238016997202  | Episode 1938
| Reward: -669.4625045807707  | Episode 1939
| Reward: -454.0435356829689  | Episode 1940
| Reward: -549.9838406638346  | Episode 1941
| Reward: -680.2042608285785  | Episode 1942
| Reward: -578.2459122403529  | Episode 1943
| Reward: -586.3374114314602  | Episode 1944
| Reward: -757.7331358984155  | Episode 1945
| Reward: -651.8480733912623  | Episode 1946
| Reward: -520.0218630706427  | Episode 1947
| Reward: -747.6074887911286  | Episode 1948
| Reward: -748.1171523824925  | Episode 1949
| Reward: -673.0803785855816  | Episode 1950
| Reward: -776.6145481842502  | Episode 1951
| Reward: -663.6767463709642  | Episode 1952
| Reward: -412.1941010011052  | Episode 1953
| Reward: -

| Reward: -489.5816095921135  | Episode 2115
| Reward: -867.1554503645305  | Episode 2116
| Reward: -638.575153358304  | Episode 2117
| Reward: -643.9823262905065  | Episode 2118
| Reward: -447.50387523403214  | Episode 2119
| Reward: -977.6830447156004  | Episode 2120
| Reward: -552.8469516839976  | Episode 2121
| Reward: -445.1028061483629  | Episode 2122
| Reward: -528.166249508673  | Episode 2123
| Reward: -614.6119430825536  | Episode 2124
| Reward: -621.7908364993734  | Episode 2125
| Reward: -872.6874401123631  | Episode 2126
| Reward: -541.0173127014623  | Episode 2127
| Reward: -783.1586781398172  | Episode 2128
| Reward: -1179.5680588036205  | Episode 2129
| Reward: -682.3739200643182  | Episode 2130
| Reward: -708.9042091658498  | Episode 2131
| Reward: -714.1358290466997  | Episode 2132
| Reward: -889.2590080327313  | Episode 2133
| Reward: -1037.348101769672  | Episode 2134
| Reward: -596.7348718320118  | Episode 2135
| Reward: -598.8736813946757  | Episode 2136
| Reward: 

| Reward: -453.9213255194533  | Episode 2297
| Reward: -598.4980269030226  | Episode 2298
| Reward: -686.5030727523203  | Episode 2299
| Reward: -952.7491030236204  | Episode 2300
| Reward: -821.3107950259156  | Episode 2301
| Reward: -784.5654672883134  | Episode 2302
| Reward: -986.349973841604  | Episode 2303
| Reward: -532.7770299112078  | Episode 2304
| Reward: -378.56358682849464  | Episode 2305
| Reward: -550.0655387302941  | Episode 2306
| Reward: -802.2204442171496  | Episode 2307
| Reward: -719.6145428051882  | Episode 2308
| Reward: -498.42025655537054  | Episode 2309
| Reward: -589.8659453996927  | Episode 2310
| Reward: -487.6977674596761  | Episode 2311
| Reward: -479.51476631506176  | Episode 2312
| Reward: -435.9219457234655  | Episode 2313
| Reward: -624.27135402519  | Episode 2314
| Reward: -624.4321817269081  | Episode 2315
| Reward: -1117.6068274087604  | Episode 2316
| Reward: -723.7404601674508  | Episode 2317
| Reward: -375.85623339048993  | Episode 2318
| Reward

| Reward: -597.14310938275  | Episode 2479
| Reward: -435.0507415632846  | Episode 2480
| Reward: -808.505996945483  | Episode 2481
| Reward: -338.0841061520441  | Episode 2482
| Reward: -243.13189808747788  | Episode 2483
| Reward: -311.01989349823805  | Episode 2484
| Reward: -480.2526513108148  | Episode 2485
| Reward: -254.1020319658249  | Episode 2486
| Reward: -406.67230204773944  | Episode 2487
| Reward: -347.52541755189606  | Episode 2488
| Reward: -276.24662129052786  | Episode 2489
| Reward: -354.4770577212041  | Episode 2490
| Reward: -580.8765359780889  | Episode 2491
| Reward: -452.55368662164364  | Episode 2492
| Reward: -324.3924013195137  | Episode 2493
| Reward: -214.07388112392817  | Episode 2494
| Reward: -693.6684685595692  | Episode 2495
| Reward: -458.0587927062673  | Episode 2496
| Reward: -771.2784591469823  | Episode 2497
| Reward: -467.9495805189558  | Episode 2498
| Reward: -175.64614736763843  | Episode 2499
| Reward: -694.9100276273995  | Episode 2500
| Rew

| Reward: -414.8223439692012  | Episode 2661
| Reward: -720.0639116667841  | Episode 2662
| Reward: -670.3135492208464  | Episode 2663
| Reward: -631.5230312710352  | Episode 2664
| Reward: -657.6683955627198  | Episode 2665
| Reward: -636.0274631559599  | Episode 2666
| Reward: -370.78978605351415  | Episode 2667
| Reward: -646.9333039468165  | Episode 2668
| Reward: -642.57076824684  | Episode 2669
| Reward: -652.3892065857854  | Episode 2670
| Reward: -729.8011790546744  | Episode 2671
| Reward: -296.06194267196815  | Episode 2672
| Reward: -703.2167307898109  | Episode 2673
| Reward: -104.33210604420437  | Episode 2674
| Reward: -191.91958518248885  | Episode 2675
| Reward: -515.2056440081149  | Episode 2676
| Reward: -701.2042954162963  | Episode 2677
| Reward: -751.4802122308146  | Episode 2678
| Reward: -150.39381281583724  | Episode 2679
| Reward: -706.7639035213315  | Episode 2680
| Reward: -964.5034363251589  | Episode 2681
| Reward: -287.41259737842097  | Episode 2682
| Rewa

| Reward: -157.96552625852178  | Episode 2842
| Reward: -294.54128224961653  | Episode 2843
| Reward: -70.6455726813283  | Episode 2844
| Reward: -303.79617850062124  | Episode 2845
| Reward: -272.8170575514588  | Episode 2846
| Reward: -351.6736380400098  | Episode 2847
| Reward: -295.2223461445236  | Episode 2848
| Reward: -460.14977670869337  | Episode 2849
| Reward: -394.0945230462226  | Episode 2850
| Reward: -364.5737989863765  | Episode 2851
| Reward: -388.35582833643366  | Episode 2852
| Reward: -325.3036247881779  | Episode 2853
| Reward: -102.80859504490712  | Episode 2854
| Reward: -346.9540129254093  | Episode 2855
| Reward: -311.83326146571284  | Episode 2856
| Reward: -200.99775087247116  | Episode 2857
| Reward: -115.89519138324557  | Episode 2858
| Reward: -138.49441095746366  | Episode 2859
| Reward: -96.5118362168551  | Episode 2860
| Reward: -129.96921023316096  | Episode 2861
| Reward: -22.658787411701212  | Episode 2862
| Reward: -76.67010480762039  | Episode 2863


| Reward: -8.510696685976484  | Episode 3023
| Reward: -305.29035430446254  | Episode 3024
| Reward: -239.42846781176965  | Episode 3025
| Reward: -72.84917442804615  | Episode 3026
| Reward: -62.274491029467015  | Episode 3027
| Reward: 4.287122576086034  | Episode 3028
| Reward: -52.434707543616625  | Episode 3029
| Reward: -134.49158763186614  | Episode 3030
| Reward: -318.92618391458893  | Episode 3031
| Reward: -114.81219483829148  | Episode 3032
| Reward: -134.3503534558023  | Episode 3033
| Reward: -144.6366674358415  | Episode 3034
| Reward: -87.62226287325582  | Episode 3035
| Reward: -69.25885933369761  | Episode 3036
| Reward: -99.56397240654232  | Episode 3037
| Reward: -146.6202009781563  | Episode 3038
| Reward: -159.5812881418462  | Episode 3039
| Reward: -136.48169476467365  | Episode 3040
| Reward: -106.12563840432387  | Episode 3041
| Reward: -134.94175209279095  | Episode 3042
| Reward: -106.52153487410055  | Episode 3043
| Reward: -130.80987682229716  | Episode 3044

| Reward: 34.700523177726836  | Episode 3204
| Reward: -117.35472903870803  | Episode 3205
| Reward: -86.95251767392116  | Episode 3206
| Reward: -25.030283269493836  | Episode 3207
| Reward: -106.27499830487773  | Episode 3208
| Reward: -63.14642460360779  | Episode 3209
| Reward: -9.125571145186655  | Episode 3210
| Reward: -53.85365154649905  | Episode 3211
| Reward: -1.9144599356190497  | Episode 3212
| Reward: -99.21801001760238  | Episode 3213
| Reward: -102.61363633153147  | Episode 3214
| Reward: -78.59531717353039  | Episode 3215
| Reward: -63.90642072410983  | Episode 3216
| Reward: -114.66198126342294  | Episode 3217
| Reward: -134.04858264810292  | Episode 3218
| Reward: -40.562386318167285  | Episode 3219
| Reward: 6.208810742171181  | Episode 3220
| Reward: -127.62574096657765  | Episode 3221
| Reward: -189.04667333181555  | Episode 3222
| Reward: -104.05676089693057  | Episode 3223
| Reward: -53.439477934262015  | Episode 3224
| Reward: -232.93903127592313  | Episode 322