In [1]:
import argparse
import gym
import numpy as np
from itertools import count
from collections import namedtuple

from DDPG.ddpg import DDPG

args = {
    'render': True,
    'log_interval': 1
}

env = gym.make('Pendulum-v0')
episodes = 100000
reward_history = []
threshold = -200


def main():   
    task = {
        'state_size': 3,
        'action_size': 1,
        'action_high': 2,
        'action_low': -2
    }
    agent = DDPG(task)    
    for i_episode in range(episodes):
        running_reward = 0        
        state = env.reset()
        for t in range(10000):  # Don't infinite loop while learning
            action, noise_coeff = agent.act(state, i_episode)                
            state, reward, done, _ = env.step(action)  
            agent.step(action, reward, state, done)
            if args['render']:
                env.render()                   
            running_reward += reward            
            if done:
                break
        
        reward_history.append(running_reward)
        
        if i_episode % args['log_interval'] == 0:
            avg_reward = np.mean(reward_history[-100:])            
            print('Episode {}\tLast length: {:5d}\t Reward: {:7.2f}\t Avg Reward: {:7.2f}\t Noise: {:.2f}'.format(
                i_episode, t, running_reward, avg_reward, noise_coeff))
        if avg_reward > threshold and i_episode > 100:
            print("Solved! Average 100-episode reward is now {}!".format(avg_reward))
            break
            
print("action_space={}".format(env.action_space))
print("obs_space={}".format(env.observation_space))
print("threshold={} \n".format(env.spec.reward_threshold))
main()



Using TensorFlow backend.


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
action_space=Box(1,)
obs_space=Box(3,)
threshold=None 

Episode 0	Last length:   199	 Reward: -1449.32	 Avg Reward: -1449.32	 Noise: 1.00
Episode 1	Last length:   199	 Reward: -1048.83	 Avg Reward: -1249.07	 Noise: 1.00
Episode 2	Last length:   199	 Reward: -1016.69	 Avg Reward: -1171.61	 Noise: 1.00
Episode 3	Last length:   199	 Reward: -1429.91	 Avg Reward: -1236.19	 Noise: 1.00
Episode 4	Last length:   199	 Reward: -1243.71	 Avg Reward: -1237.69	 Noise: 1.00
Episode 5	Last length:   199	 Reward: -1588.28	 Avg Reward: -1296.12	 Noise: 1.00
Episode 6	Last length:   199	 Reward: -1590.12	 Avg Reward: -1338.12	 Noise: 1.00
Episode 7	Last length:   199	 Reward: -1861.15	 Avg Reward: -1403.50	 Noise: 1.00
Episode 8	Last length:   199	 Reward: -1474.91	 Avg Reward: -1411.44	 No

Episode 96	Last length:   199	 Reward: -1143.20	 Avg Reward: -1170.59	 Noise: 0.97
Episode 97	Last length:   199	 Reward: -1073.78	 Avg Reward: -1169.60	 Noise: 0.97
Episode 98	Last length:   199	 Reward: -1077.61	 Avg Reward: -1168.67	 Noise: 0.97
Episode 99	Last length:   199	 Reward: -1176.54	 Avg Reward: -1168.75	 Noise: 0.97
Episode 100	Last length:   199	 Reward: -1115.52	 Avg Reward: -1165.41	 Noise: 0.97
Episode 101	Last length:   199	 Reward: -966.27	 Avg Reward: -1164.59	 Noise: 0.97
Episode 102	Last length:   199	 Reward: -987.40	 Avg Reward: -1164.29	 Noise: 0.97
Episode 103	Last length:   199	 Reward: -1012.32	 Avg Reward: -1160.12	 Noise: 0.97
Episode 104	Last length:   199	 Reward: -993.48	 Avg Reward: -1157.62	 Noise: 0.97
Episode 105	Last length:   199	 Reward: -982.36	 Avg Reward: -1151.56	 Noise: 0.96
Episode 106	Last length:   199	 Reward: -1033.73	 Avg Reward: -1145.99	 Noise: 0.96
Episode 107	Last length:   199	 Reward: -999.30	 Avg Reward: -1137.38	 Noise: 0.96
E

Episode 196	Last length:   199	 Reward: -971.54	 Avg Reward: -646.07	 Noise: 0.93
Episode 197	Last length:   199	 Reward: -550.24	 Avg Reward: -640.83	 Noise: 0.93
Episode 198	Last length:   199	 Reward: -1062.55	 Avg Reward: -640.68	 Noise: 0.93
Episode 199	Last length:   199	 Reward: -1155.07	 Avg Reward: -640.47	 Noise: 0.93
Episode 200	Last length:   199	 Reward: -1267.78	 Avg Reward: -641.99	 Noise: 0.93
Episode 201	Last length:   199	 Reward: -935.90	 Avg Reward: -641.69	 Noise: 0.93
Episode 202	Last length:   199	 Reward: -1072.55	 Avg Reward: -642.54	 Noise: 0.93
Episode 203	Last length:   199	 Reward: -1573.74	 Avg Reward: -648.15	 Noise: 0.93
Episode 204	Last length:   199	 Reward: -1206.89	 Avg Reward: -650.29	 Noise: 0.93
Episode 205	Last length:   199	 Reward: -1050.85	 Avg Reward: -650.97	 Noise: 0.93
Episode 206	Last length:   199	 Reward: -773.18	 Avg Reward: -648.37	 Noise: 0.93
Episode 207	Last length:   199	 Reward: -262.58	 Avg Reward: -641.00	 Noise: 0.93
Episode 2

Episode 296	Last length:   199	 Reward: -260.27	 Avg Reward: -478.37	 Noise: 0.90
Episode 297	Last length:   199	 Reward: -134.04	 Avg Reward: -474.21	 Noise: 0.90
Episode 298	Last length:   199	 Reward: -262.04	 Avg Reward: -466.20	 Noise: 0.90
Episode 299	Last length:   199	 Reward: -763.30	 Avg Reward: -462.28	 Noise: 0.90
Episode 300	Last length:   199	 Reward: -261.69	 Avg Reward: -452.22	 Noise: 0.90
Episode 301	Last length:   199	 Reward: -502.21	 Avg Reward: -447.89	 Noise: 0.90
Episode 302	Last length:   199	 Reward: -501.30	 Avg Reward: -442.17	 Noise: 0.90
Episode 303	Last length:   199	 Reward: -257.56	 Avg Reward: -429.01	 Noise: 0.90
Episode 304	Last length:   199	 Reward: -248.00	 Avg Reward: -419.42	 Noise: 0.90
Episode 305	Last length:   199	 Reward: -133.21	 Avg Reward: -410.25	 Noise: 0.90
Episode 306	Last length:   199	 Reward: -131.24	 Avg Reward: -403.83	 Noise: 0.90
Episode 307	Last length:   199	 Reward: -490.10	 Avg Reward: -406.10	 Noise: 0.90
Episode 308	Last

Episode 396	Last length:   199	 Reward: -128.67	 Avg Reward: -345.02	 Noise: 0.87
Episode 397	Last length:   199	 Reward: -121.61	 Avg Reward: -344.90	 Noise: 0.87
Episode 398	Last length:   199	 Reward: -127.53	 Avg Reward: -343.55	 Noise: 0.87
Episode 399	Last length:   199	 Reward:   -6.57	 Avg Reward: -335.99	 Noise: 0.87
Episode 400	Last length:   199	 Reward: -252.66	 Avg Reward: -335.90	 Noise: 0.87
Episode 401	Last length:   199	 Reward: -266.21	 Avg Reward: -333.54	 Noise: 0.87
Episode 402	Last length:   199	 Reward: -507.35	 Avg Reward: -333.60	 Noise: 0.87
Episode 403	Last length:   199	 Reward: -130.65	 Avg Reward: -332.33	 Noise: 0.87
Episode 404	Last length:   199	 Reward: -130.12	 Avg Reward: -331.15	 Noise: 0.87
Episode 405	Last length:   199	 Reward: -327.77	 Avg Reward: -333.09	 Noise: 0.86
Episode 406	Last length:   199	 Reward: -280.29	 Avg Reward: -334.58	 Noise: 0.86
Episode 407	Last length:   199	 Reward: -127.86	 Avg Reward: -330.96	 Noise: 0.86
Episode 408	Last

KeyboardInterrupt: 