In [1]:
import argparse
import gym
import numpy as np
from itertools import count
from collections import namedtuple

from DDPG.ddpg import DDPG

args = {
    'render': True,
    'log_interval': 1
}

env = gym.make('Pendulum-v0')
episodes = 100000
reward_history = []
threshold = -300


def main():   
    task = {
        'state_size': 3,
        'action_size': 1,
        'action_high': 2,
        'action_low': -2
    }
    agent = DDPG(task)    
    for i_episode in range(episodes):
        running_reward = 0        
        state = env.reset()
        for t in range(10000):  # Don't infinite loop while learning
            action, noise_coeff = agent.act(state, i_episode)                
            state, reward, done, _ = env.step(action)  
            agent.step(action, reward, state, done)
            if args['render']:
                env.render()                   
            running_reward += reward            
            if done:
                break
        
        reward_history.append(running_reward)
        
        if i_episode % args['log_interval'] == 0:
            avg_reward = np.mean(reward_history[-100:])            
            print('Episode {}\tLast length: {:5d}\t Reward: {:7.2f}\t Avg Reward: {:7.2f}\t Noise: {:.2f}'.format(
                i_episode, t, running_reward, avg_reward, noise_coeff))
        if avg_reward > threshold and i_episode > 100:
            print("Solved! Average 100-episode reward is now {}!".format(avg_reward))
            break
            
print("action_space={}".format(env.action_space))
print("obs_space={}".format(env.observation_space))
print("threshold={} \n".format(env.spec.reward_threshold))
main()



Using TensorFlow backend.


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
action_space=Box(1,)
obs_space=Box(3,)
threshold=None 

Episode 0	Last length:   199	 Reward: -1349.14	 Avg Reward: -1349.14	 Noise: 1.00
Episode 1	Last length:   199	 Reward: -1382.71	 Avg Reward: -1365.92	 Noise: 1.00
Episode 2	Last length:   199	 Reward: -1398.51	 Avg Reward: -1376.79	 Noise: 1.00
Episode 3	Last length:   199	 Reward: -1358.64	 Avg Reward: -1372.25	 Noise: 1.00
Episode 4	Last length:   199	 Reward: -1129.20	 Avg Reward: -1323.64	 Noise: 1.00
Episode 5	Last length:   199	 Reward: -1116.53	 Avg Reward: -1289.12	 Noise: 1.00
Episode 6	Last length:   199	 Reward: -1402.09	 Avg Reward: -1305.26	 Noise: 1.00
Episode 7	Last length:   199	 Reward: -1109.13	 Avg Reward: -1280.74	 Noise: 1.00
Episode 8	Last length:   199	 Reward: -1338.98	 Avg Reward: -1287.21	 No

Episode 96	Last length:   199	 Reward: -1367.09	 Avg Reward: -1356.36	 Noise: 0.97
Episode 97	Last length:   199	 Reward: -1343.45	 Avg Reward: -1356.23	 Noise: 0.97
Episode 98	Last length:   199	 Reward: -1306.47	 Avg Reward: -1355.72	 Noise: 0.97
Episode 99	Last length:   199	 Reward: -1311.52	 Avg Reward: -1355.28	 Noise: 0.97
Episode 100	Last length:   199	 Reward: -1279.91	 Avg Reward: -1354.59	 Noise: 0.97
Episode 101	Last length:   199	 Reward: -1291.39	 Avg Reward: -1353.68	 Noise: 0.97
Episode 102	Last length:   199	 Reward: -1285.70	 Avg Reward: -1352.55	 Noise: 0.97
Episode 103	Last length:   199	 Reward: -1171.24	 Avg Reward: -1350.67	 Noise: 0.97
Episode 104	Last length:   199	 Reward: -1062.17	 Avg Reward: -1350.00	 Noise: 0.97
Episode 105	Last length:   199	 Reward: -1117.94	 Avg Reward: -1350.02	 Noise: 0.96
Episode 106	Last length:   199	 Reward: -1546.57	 Avg Reward: -1351.46	 Noise: 0.96
Episode 107	Last length:   199	 Reward: -1176.45	 Avg Reward: -1352.14	 Noise: 0

Episode 194	Last length:   199	 Reward: -1495.03	 Avg Reward: -1402.24	 Noise: 0.94
Episode 195	Last length:   199	 Reward: -1041.54	 Avg Reward: -1397.37	 Noise: 0.94
Episode 196	Last length:   199	 Reward: -752.87	 Avg Reward: -1391.23	 Noise: 0.93
Episode 197	Last length:   199	 Reward: -826.96	 Avg Reward: -1386.06	 Noise: 0.93
Episode 198	Last length:   199	 Reward: -679.01	 Avg Reward: -1379.79	 Noise: 0.93
Episode 199	Last length:   199	 Reward: -383.06	 Avg Reward: -1370.50	 Noise: 0.93
Episode 200	Last length:   199	 Reward: -279.28	 Avg Reward: -1360.50	 Noise: 0.93
Episode 201	Last length:   199	 Reward: -677.46	 Avg Reward: -1354.36	 Noise: 0.93
Episode 202	Last length:   199	 Reward: -519.54	 Avg Reward: -1346.70	 Noise: 0.93
Episode 203	Last length:   199	 Reward: -270.74	 Avg Reward: -1337.69	 Noise: 0.93
Episode 204	Last length:   199	 Reward: -135.33	 Avg Reward: -1328.42	 Noise: 0.93
Episode 205	Last length:   199	 Reward: -529.09	 Avg Reward: -1322.54	 Noise: 0.93
Ep

Episode 293	Last length:   199	 Reward:  -11.96	 Avg Reward: -930.07	 Noise: 0.90
Episode 294	Last length:   199	 Reward: -1406.90	 Avg Reward: -929.19	 Noise: 0.90
Episode 295	Last length:   199	 Reward: -140.08	 Avg Reward: -920.17	 Noise: 0.90
Episode 296	Last length:   199	 Reward: -1410.46	 Avg Reward: -926.75	 Noise: 0.90
Episode 297	Last length:   199	 Reward: -1421.36	 Avg Reward: -932.69	 Noise: 0.90
Episode 298	Last length:   199	 Reward: -1399.92	 Avg Reward: -939.90	 Noise: 0.90
Episode 299	Last length:   199	 Reward: -1442.58	 Avg Reward: -950.50	 Noise: 0.90
Episode 300	Last length:   199	 Reward: -1493.98	 Avg Reward: -962.64	 Noise: 0.90
Episode 301	Last length:   199	 Reward: -1489.60	 Avg Reward: -970.76	 Noise: 0.90
Episode 302	Last length:   199	 Reward:  -18.25	 Avg Reward: -965.75	 Noise: 0.90
Episode 303	Last length:   199	 Reward: -1311.72	 Avg Reward: -976.16	 Noise: 0.90
Episode 304	Last length:   199	 Reward: -863.15	 Avg Reward: -983.44	 Noise: 0.90
Episode 

Episode 393	Last length:   199	 Reward:  -10.44	 Avg Reward: -570.45	 Noise: 0.87
Episode 394	Last length:   199	 Reward:   -7.26	 Avg Reward: -556.45	 Noise: 0.87
Episode 395	Last length:   199	 Reward: -813.44	 Avg Reward: -563.19	 Noise: 0.87
Episode 396	Last length:   199	 Reward: -476.25	 Avg Reward: -553.84	 Noise: 0.87
Episode 397	Last length:   199	 Reward: -611.49	 Avg Reward: -545.74	 Noise: 0.87
Episode 398	Last length:   199	 Reward: -758.02	 Avg Reward: -539.33	 Noise: 0.87
Episode 399	Last length:   199	 Reward: -126.59	 Avg Reward: -526.17	 Noise: 0.87
Episode 400	Last length:   199	 Reward: -628.51	 Avg Reward: -517.51	 Noise: 0.87
Episode 401	Last length:   199	 Reward: -499.07	 Avg Reward: -507.61	 Noise: 0.87
Episode 402	Last length:   199	 Reward: -721.87	 Avg Reward: -514.64	 Noise: 0.87
Episode 403	Last length:   199	 Reward: -367.50	 Avg Reward: -505.20	 Noise: 0.87
Episode 404	Last length:   199	 Reward:  -11.48	 Avg Reward: -496.68	 Noise: 0.87
Episode 405	Last

Episode 493	Last length:   199	 Reward: -133.78	 Avg Reward: -330.06	 Noise: 0.84
Episode 494	Last length:   199	 Reward: -373.70	 Avg Reward: -333.72	 Noise: 0.84
Episode 495	Last length:   199	 Reward: -369.13	 Avg Reward: -329.28	 Noise: 0.83
Episode 496	Last length:   199	 Reward:  -13.81	 Avg Reward: -324.66	 Noise: 0.83
Episode 497	Last length:   199	 Reward: -258.67	 Avg Reward: -321.13	 Noise: 0.83
Episode 498	Last length:   199	 Reward: -254.56	 Avg Reward: -316.09	 Noise: 0.83
Episode 499	Last length:   199	 Reward: -136.86	 Avg Reward: -316.20	 Noise: 0.83
Episode 500	Last length:   199	 Reward: -257.25	 Avg Reward: -312.48	 Noise: 0.83
Episode 501	Last length:   199	 Reward: -239.06	 Avg Reward: -309.88	 Noise: 0.83
Episode 502	Last length:   199	 Reward: -262.15	 Avg Reward: -305.29	 Noise: 0.83
Episode 503	Last length:   199	 Reward: -252.05	 Avg Reward: -304.13	 Noise: 0.83
Episode 504	Last length:   199	 Reward: -136.37	 Avg Reward: -305.38	 Noise: 0.83
Episode 505	Last