In [None]:
import argparse
import gym
import numpy as np
from itertools import count
from collections import namedtuple

from DDPG.ddpg import DDPG

args = {
    'render': True,
    'log_interval': 1
}

env = gym.make('Pendulum-v0')
episodes = 100000
reward_history = []
threshold = -200


def main():   
    task = {
        'state_size': 3,
        'action_size': 1,
        'action_high': 2,
        'action_low': -2
    }
    agent = DDPG(task)    
    for i_episode in range(episodes):
        running_reward = 0        
        state = env.reset()
        for t in range(10000):  # Don't infinite loop while learning
            action, noise_coeff = agent.act(state, i_episode)                
            state, reward, done, _ = env.step(action)  
            agent.step(action, reward, state, done)
            if args['render']:
                env.render()                   
            running_reward += reward            
            if done:
                break
        
        reward_history.append(running_reward)
        
        if i_episode % args['log_interval'] == 0:
            avg_reward = np.mean(reward_history[-100:])            
            print('Episode {}\tLast length: {:5d}\t Reward: {:7.2f}\t Avg Reward: {:7.2f}\t Noise: {:.2f}'.format(
                i_episode, t, running_reward, avg_reward, noise_coeff))
        if avg_reward > threshold and i_episode > 100:
            print("Solved! Average 100-episode reward is now {}!".format(avg_reward))
            break
            
print("action_space={}".format(env.action_space))
print("obs_space={}".format(env.observation_space))
print("threshold={} \n".format(env.spec.reward_threshold))
main()



Using TensorFlow backend.


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
action_space=Box(1,)
obs_space=Box(3,)
threshold=None 

Episode 0	Last length:   199	 Reward: -1654.62	 Avg Reward: -1654.62	 Noise: 1.00
Episode 1	Last length:   199	 Reward: -1386.56	 Avg Reward: -1520.59	 Noise: 1.00
Episode 2	Last length:   199	 Reward: -1541.61	 Avg Reward: -1527.60	 Noise: 1.00
Episode 3	Last length:   199	 Reward: -1111.83	 Avg Reward: -1423.66	 Noise: 1.00
Episode 4	Last length:   199	 Reward: -1446.26	 Avg Reward: -1428.18	 Noise: 1.00
Episode 5	Last length:   199	 Reward: -1513.44	 Avg Reward: -1442.39	 Noise: 1.00
Episode 6	Last length:   199	 Reward: -1601.36	 Avg Reward: -1465.10	 Noise: 1.00
Episode 7	Last length:   199	 Reward: -1351.91	 Avg Reward: -1450.95	 Noise: 1.00
Episode 8	Last length:   199	 Reward: -1578.15	 Avg Reward: -1465.08	 No

Episode 96	Last length:   199	 Reward: -1548.21	 Avg Reward: -1422.45	 Noise: 0.97
Episode 97	Last length:   199	 Reward: -1567.14	 Avg Reward: -1423.92	 Noise: 0.97
Episode 98	Last length:   199	 Reward: -1507.23	 Avg Reward: -1424.77	 Noise: 0.97
Episode 99	Last length:   199	 Reward: -1530.70	 Avg Reward: -1425.83	 Noise: 0.97
Episode 100	Last length:   199	 Reward: -1524.17	 Avg Reward: -1424.52	 Noise: 0.97
Episode 101	Last length:   199	 Reward: -1532.79	 Avg Reward: -1425.98	 Noise: 0.97
Episode 102	Last length:   199	 Reward: -1582.39	 Avg Reward: -1426.39	 Noise: 0.97
Episode 103	Last length:   199	 Reward: -1511.96	 Avg Reward: -1430.39	 Noise: 0.97
Episode 104	Last length:   199	 Reward: -1619.83	 Avg Reward: -1432.13	 Noise: 0.97
Episode 105	Last length:   199	 Reward: -1632.49	 Avg Reward: -1433.32	 Noise: 0.96
Episode 106	Last length:   199	 Reward: -1614.12	 Avg Reward: -1433.45	 Noise: 0.96
Episode 107	Last length:   199	 Reward: -1564.34	 Avg Reward: -1435.57	 Noise: 0

Episode 196	Last length:   199	 Reward: -128.50	 Avg Reward: -495.35	 Noise: 0.93
Episode 197	Last length:   199	 Reward: -648.02	 Avg Reward: -486.16	 Noise: 0.93
Episode 198	Last length:   199	 Reward: -125.26	 Avg Reward: -472.34	 Noise: 0.93
Episode 199	Last length:   199	 Reward: -253.11	 Avg Reward: -459.56	 Noise: 0.93
Episode 200	Last length:   199	 Reward: -125.52	 Avg Reward: -445.58	 Noise: 0.93
Episode 201	Last length:   199	 Reward: -129.30	 Avg Reward: -431.54	 Noise: 0.93
Episode 202	Last length:   199	 Reward: -121.00	 Avg Reward: -416.93	 Noise: 0.93
Episode 203	Last length:   199	 Reward: -125.58	 Avg Reward: -403.06	 Noise: 0.93
Episode 204	Last length:   199	 Reward: -119.83	 Avg Reward: -388.06	 Noise: 0.93
Episode 205	Last length:   199	 Reward: -250.40	 Avg Reward: -374.24	 Noise: 0.93
Episode 206	Last length:   199	 Reward: -379.84	 Avg Reward: -361.90	 Noise: 0.93
Episode 207	Last length:   199	 Reward: -139.64	 Avg Reward: -347.65	 Noise: 0.93
Episode 208	Last

Episode 296	Last length:   199	 Reward:   -5.52	 Avg Reward: -529.79	 Noise: 0.90
Episode 297	Last length:   199	 Reward: -1577.57	 Avg Reward: -539.08	 Noise: 0.90
Episode 298	Last length:   199	 Reward: -1335.19	 Avg Reward: -551.18	 Noise: 0.90
Episode 299	Last length:   199	 Reward: -1215.08	 Avg Reward: -560.80	 Noise: 0.90
Episode 300	Last length:   199	 Reward: -1267.14	 Avg Reward: -572.22	 Noise: 0.90
Episode 301	Last length:   199	 Reward: -1039.88	 Avg Reward: -581.32	 Noise: 0.90
Episode 302	Last length:   199	 Reward: -1534.66	 Avg Reward: -595.46	 Noise: 0.90
Episode 303	Last length:   199	 Reward: -471.87	 Avg Reward: -598.92	 Noise: 0.90
Episode 304	Last length:   199	 Reward: -383.43	 Avg Reward: -601.56	 Noise: 0.90
Episode 305	Last length:   199	 Reward:   -7.43	 Avg Reward: -599.13	 Noise: 0.90
Episode 306	Last length:   199	 Reward: -365.66	 Avg Reward: -598.99	 Noise: 0.90
Episode 307	Last length:   199	 Reward: -917.37	 Avg Reward: -606.76	 Noise: 0.90
Episode 30

Episode 396	Last length:   199	 Reward: -237.31	 Avg Reward: -369.74	 Noise: 0.87
Episode 397	Last length:   199	 Reward: -135.12	 Avg Reward: -355.32	 Noise: 0.87
Episode 398	Last length:   199	 Reward: -137.79	 Avg Reward: -343.35	 Noise: 0.87
Episode 399	Last length:   199	 Reward: -264.63	 Avg Reward: -333.84	 Noise: 0.87
Episode 400	Last length:   199	 Reward:   -7.18	 Avg Reward: -321.24	 Noise: 0.87
Episode 401	Last length:   199	 Reward: -127.95	 Avg Reward: -312.12	 Noise: 0.87
Episode 402	Last length:   199	 Reward: -263.29	 Avg Reward: -299.41	 Noise: 0.87
Episode 403	Last length:   199	 Reward: -368.60	 Avg Reward: -298.38	 Noise: 0.87
Episode 404	Last length:   199	 Reward: -129.74	 Avg Reward: -295.84	 Noise: 0.87
Episode 405	Last length:   199	 Reward: -132.44	 Avg Reward: -297.09	 Noise: 0.86
Episode 406	Last length:   199	 Reward: -132.02	 Avg Reward: -294.75	 Noise: 0.86
Episode 407	Last length:   199	 Reward: -262.97	 Avg Reward: -288.21	 Noise: 0.86
Episode 408	Last

Episode 496	Last length:   199	 Reward: -262.46	 Avg Reward: -296.98	 Noise: 0.83
Episode 497	Last length:   199	 Reward: -261.30	 Avg Reward: -298.25	 Noise: 0.83
Episode 498	Last length:   199	 Reward: -134.73	 Avg Reward: -298.22	 Noise: 0.83
Episode 499	Last length:   199	 Reward: -601.39	 Avg Reward: -301.58	 Noise: 0.83
Episode 500	Last length:   199	 Reward: -391.17	 Avg Reward: -305.42	 Noise: 0.83
Episode 501	Last length:   199	 Reward: -380.98	 Avg Reward: -307.95	 Noise: 0.83
Episode 502	Last length:   199	 Reward: -415.65	 Avg Reward: -309.48	 Noise: 0.83
Episode 503	Last length:   199	 Reward: -138.07	 Avg Reward: -307.17	 Noise: 0.83
Episode 504	Last length:   199	 Reward: -739.67	 Avg Reward: -313.27	 Noise: 0.83
Episode 505	Last length:   199	 Reward: -619.91	 Avg Reward: -318.15	 Noise: 0.83
Episode 506	Last length:   199	 Reward: -386.78	 Avg Reward: -320.69	 Noise: 0.83
Episode 507	Last length:   199	 Reward: -383.89	 Avg Reward: -321.90	 Noise: 0.83
Episode 508	Last

Episode 596	Last length:   199	 Reward: -262.46	 Avg Reward: -548.75	 Noise: 0.80
Episode 597	Last length:   199	 Reward: -397.19	 Avg Reward: -550.11	 Noise: 0.80
Episode 598	Last length:   199	 Reward: -138.89	 Avg Reward: -550.15	 Noise: 0.80
Episode 599	Last length:   199	 Reward: -1173.57	 Avg Reward: -555.87	 Noise: 0.80
Episode 600	Last length:   199	 Reward: -233.30	 Avg Reward: -554.29	 Noise: 0.80
Episode 601	Last length:   199	 Reward: -1205.04	 Avg Reward: -562.54	 Noise: 0.80
Episode 602	Last length:   199	 Reward: -703.21	 Avg Reward: -565.41	 Noise: 0.80
Episode 603	Last length:   199	 Reward: -273.45	 Avg Reward: -566.76	 Noise: 0.80
Episode 604	Last length:   199	 Reward: -400.30	 Avg Reward: -563.37	 Noise: 0.80
Episode 605	Last length:   199	 Reward: -271.18	 Avg Reward: -559.88	 Noise: 0.80
Episode 606	Last length:   199	 Reward: -150.84	 Avg Reward: -557.52	 Noise: 0.80
Episode 607	Last length:   199	 Reward: -399.47	 Avg Reward: -557.68	 Noise: 0.80
Episode 608	La

Episode 696	Last length:   199	 Reward: -1498.93	 Avg Reward: -502.98	 Noise: 0.77
Episode 697	Last length:   199	 Reward: -931.15	 Avg Reward: -508.32	 Noise: 0.77
Episode 698	Last length:   199	 Reward: -904.02	 Avg Reward: -515.97	 Noise: 0.77
Episode 699	Last length:   199	 Reward: -1525.82	 Avg Reward: -519.49	 Noise: 0.77
Episode 700	Last length:   199	 Reward: -937.08	 Avg Reward: -526.53	 Noise: 0.77
Episode 701	Last length:   199	 Reward: -937.45	 Avg Reward: -523.86	 Noise: 0.77
Episode 702	Last length:   199	 Reward: -855.95	 Avg Reward: -525.38	 Noise: 0.77
Episode 703	Last length:   199	 Reward: -933.54	 Avg Reward: -531.98	 Noise: 0.77
Episode 704	Last length:   199	 Reward: -748.28	 Avg Reward: -535.46	 Noise: 0.77
Episode 705	Last length:   199	 Reward: -833.00	 Avg Reward: -541.08	 Noise: 0.77
Episode 706	Last length:   199	 Reward: -755.49	 Avg Reward: -547.13	 Noise: 0.76
Episode 707	Last length:   199	 Reward: -767.48	 Avg Reward: -550.81	 Noise: 0.76
Episode 708	La

Episode 796	Last length:   199	 Reward: -630.15	 Avg Reward: -849.11	 Noise: 0.73
Episode 797	Last length:   199	 Reward: -633.96	 Avg Reward: -846.14	 Noise: 0.73
Episode 798	Last length:   199	 Reward: -629.87	 Avg Reward: -843.40	 Noise: 0.73
Episode 799	Last length:   199	 Reward: -626.14	 Avg Reward: -834.40	 Noise: 0.73
Episode 800	Last length:   199	 Reward: -736.43	 Avg Reward: -832.39	 Noise: 0.73
Episode 801	Last length:   199	 Reward: -628.24	 Avg Reward: -829.30	 Noise: 0.73
Episode 802	Last length:   199	 Reward: -690.57	 Avg Reward: -827.65	 Noise: 0.73
Episode 803	Last length:   199	 Reward: -743.36	 Avg Reward: -825.74	 Noise: 0.73
Episode 804	Last length:   199	 Reward: -636.28	 Avg Reward: -824.62	 Noise: 0.73
Episode 805	Last length:   199	 Reward: -835.31	 Avg Reward: -824.65	 Noise: 0.73
Episode 806	Last length:   199	 Reward: -626.34	 Avg Reward: -823.36	 Noise: 0.73
Episode 807	Last length:   199	 Reward: -624.89	 Avg Reward: -821.93	 Noise: 0.73
Episode 808	Last

Episode 896	Last length:   199	 Reward: -1067.69	 Avg Reward: -931.83	 Noise: 0.70
Episode 897	Last length:   199	 Reward: -1039.86	 Avg Reward: -935.89	 Noise: 0.70
Episode 898	Last length:   199	 Reward: -1532.74	 Avg Reward: -944.92	 Noise: 0.70
Episode 899	Last length:   199	 Reward: -1298.14	 Avg Reward: -951.64	 Noise: 0.70
Episode 900	Last length:   199	 Reward: -1224.81	 Avg Reward: -956.52	 Noise: 0.70
Episode 901	Last length:   199	 Reward: -1165.21	 Avg Reward: -961.89	 Noise: 0.70
Episode 902	Last length:   199	 Reward: -1041.53	 Avg Reward: -965.40	 Noise: 0.70
Episode 903	Last length:   199	 Reward: -1286.10	 Avg Reward: -970.83	 Noise: 0.70
Episode 904	Last length:   199	 Reward: -1179.55	 Avg Reward: -976.26	 Noise: 0.70
Episode 905	Last length:   199	 Reward: -1517.92	 Avg Reward: -983.09	 Noise: 0.70
Episode 906	Last length:   199	 Reward: -1264.12	 Avg Reward: -989.47	 Noise: 0.70
Episode 907	Last length:   199	 Reward: -1174.59	 Avg Reward: -994.96	 Noise: 0.70
Epis

Episode 996	Last length:   199	 Reward: -265.71	 Avg Reward: -624.42	 Noise: 0.67
Episode 997	Last length:   199	 Reward: -848.83	 Avg Reward: -622.51	 Noise: 0.67
Episode 998	Last length:   199	 Reward: -746.82	 Avg Reward: -614.66	 Noise: 0.67
Episode 999	Last length:   199	 Reward: -383.39	 Avg Reward: -605.51	 Noise: 0.67
Episode 1000	Last length:   199	 Reward: -558.51	 Avg Reward: -598.84	 Noise: 0.67
Episode 1001	Last length:   199	 Reward: -605.49	 Avg Reward: -593.25	 Noise: 0.67
Episode 1002	Last length:   199	 Reward: -770.02	 Avg Reward: -590.53	 Noise: 0.67
Episode 1003	Last length:   199	 Reward: -238.24	 Avg Reward: -580.05	 Noise: 0.67
Episode 1004	Last length:   199	 Reward: -445.37	 Avg Reward: -572.71	 Noise: 0.67
Episode 1005	Last length:   199	 Reward: -618.81	 Avg Reward: -563.72	 Noise: 0.67
Episode 1006	Last length:   199	 Reward: -608.64	 Avg Reward: -557.17	 Noise: 0.66
Episode 1007	Last length:   199	 Reward: -334.30	 Avg Reward: -548.76	 Noise: 0.66
Episode 

Episode 1095	Last length:   199	 Reward: -123.58	 Avg Reward: -664.36	 Noise: 0.64
Episode 1096	Last length:   199	 Reward:   -1.69	 Avg Reward: -661.72	 Noise: 0.63
Episode 1097	Last length:   199	 Reward: -119.83	 Avg Reward: -654.43	 Noise: 0.63
Episode 1098	Last length:   199	 Reward:   -1.19	 Avg Reward: -646.97	 Noise: 0.63
Episode 1099	Last length:   199	 Reward: -121.43	 Avg Reward: -644.35	 Noise: 0.63
Episode 1100	Last length:   199	 Reward: -1527.52	 Avg Reward: -654.04	 Noise: 0.63
Episode 1101	Last length:   199	 Reward: -688.51	 Avg Reward: -654.87	 Noise: 0.63
Episode 1102	Last length:   199	 Reward: -350.39	 Avg Reward: -650.67	 Noise: 0.63
Episode 1103	Last length:   199	 Reward: -126.32	 Avg Reward: -649.55	 Noise: 0.63
Episode 1104	Last length:   199	 Reward: -123.44	 Avg Reward: -646.34	 Noise: 0.63
Episode 1105	Last length:   199	 Reward: -234.68	 Avg Reward: -642.49	 Noise: 0.63
Episode 1106	Last length:   199	 Reward: -126.98	 Avg Reward: -637.68	 Noise: 0.63
Epi