In [1]:
import argparse
import gym
import numpy as np
from itertools import count
from collections import namedtuple

from DDPG.ddpg import DDPG

args = {
    'gamma': 0.99,
    'render': True,
    'log_interval': 50
}

env = gym.make('Pendulum-v0')
episodes = 100000
print("action_space={}".format(env.action_space))
print("obs_space={}".format(env.observation_space))


def main():   
    task = {
        'state_size': 3,
        'action_size': 1,
        'action_high': 2,
        'action_low': -2
    }
    agent = DDPG(task)
    sum_reward = 0
    threshold = -100
    for i_episode in range(episodes):
        running_reward = 0        
        state = env.reset()
        for t in range(10000):  # Don't infinite loop while learning
            action = agent.act(state, i_episode)                
            state, reward, done, _ = env.step(action)            
            if args['render']:
                env.render()                   
            running_reward += reward            
            if done:
                break
        
        sum_reward += running_reward
        
        if i_episode % args['log_interval'] == 0:
            avg_reward = sum_reward / args['log_interval']
            sum_reward = 0
            print('Episode {}\tLast length: {:5d}\t Reward: {:7.2f}\t Avg Reward: {:7.2f}'.format(
                i_episode, t, running_reward, avg_reward))
        if running_reward > threshold:
            print("Solved! Running reward is now {} and "
                  "the last episode runs to {} time steps!".format(running_reward, t))
            break

print("threshold={} \n".format(env.spec.reward_threshold))
main()



Using TensorFlow backend.


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
action_space=Box(1,)
obs_space=Box(3,)
threshold=None 

Episode 0	Last length:   199	 Reward: -1461.83	 Avg Reward:  -29.24
Episode 50	Last length:   199	 Reward: -1427.82	 Avg Reward: -1314.31
Episode 100	Last length:   199	 Reward: -1407.98	 Avg Reward: -1281.10
Episode 150	Last length:   199	 Reward: -1626.74	 Avg Reward: -1326.54
Episode 200	Last length:   199	 Reward: -988.87	 Avg Reward: -1319.93
Episode 250	Last length:   199	 Reward: -1631.19	 Avg Reward: -1380.47
Episode 300	Last length:   199	 Reward: -1326.55	 Avg Reward: -1312.35
Episode 350	Last length:   199	 Reward: -1229.22	 Avg Reward: -1295.11
Episode 400	Last length:   199	 Reward: -1349.11	 Avg Reward: -1323.63
Episode 450	Last length:   199	 Reward: -1312.77	 Avg Reward: -1368.92
Episode 500	Last length

Episode 5550	Last length:   199	 Reward: -809.06	 Avg Reward: -1310.79
Episode 5600	Last length:   199	 Reward: -1365.94	 Avg Reward: -1343.60
Episode 5650	Last length:   199	 Reward: -1056.14	 Avg Reward: -1337.76
Episode 5700	Last length:   199	 Reward: -1308.51	 Avg Reward: -1320.00
Episode 5750	Last length:   199	 Reward: -1347.44	 Avg Reward: -1315.48
Episode 5800	Last length:   199	 Reward: -1206.88	 Avg Reward: -1380.92
Episode 5850	Last length:   199	 Reward: -1320.09	 Avg Reward: -1290.55
Episode 5900	Last length:   199	 Reward: -1259.00	 Avg Reward: -1322.09
Episode 5950	Last length:   199	 Reward: -1363.31	 Avg Reward: -1343.08
Episode 6000	Last length:   199	 Reward: -1370.33	 Avg Reward: -1328.03
Episode 6050	Last length:   199	 Reward: -1308.91	 Avg Reward: -1295.62
Episode 6100	Last length:   199	 Reward: -1078.90	 Avg Reward: -1349.75
Episode 6150	Last length:   199	 Reward: -1802.69	 Avg Reward: -1338.39
Episode 6200	Last length:   199	 Reward: -1472.90	 Avg Reward: -1

Episode 11250	Last length:   199	 Reward: -1153.53	 Avg Reward: -1310.41
Episode 11300	Last length:   199	 Reward: -1275.42	 Avg Reward: -1321.93
Episode 11350	Last length:   199	 Reward: -1387.05	 Avg Reward: -1338.49
Episode 11400	Last length:   199	 Reward: -1261.16	 Avg Reward: -1327.03
Episode 11450	Last length:   199	 Reward: -900.15	 Avg Reward: -1353.74
Episode 11500	Last length:   199	 Reward: -888.65	 Avg Reward: -1298.84
Episode 11550	Last length:   199	 Reward: -1345.50	 Avg Reward: -1319.01
Episode 11600	Last length:   199	 Reward: -1319.63	 Avg Reward: -1264.19
Episode 11650	Last length:   199	 Reward: -1343.37	 Avg Reward: -1319.00
Episode 11700	Last length:   199	 Reward: -1257.16	 Avg Reward: -1284.11
Episode 11750	Last length:   199	 Reward: -1525.92	 Avg Reward: -1310.65
Episode 11800	Last length:   199	 Reward: -1316.52	 Avg Reward: -1278.38
Episode 11850	Last length:   199	 Reward: -1296.43	 Avg Reward: -1337.29
Episode 11900	Last length:   199	 Reward: -1319.83	 A

Episode 16900	Last length:   199	 Reward: -1398.12	 Avg Reward: -1337.89
Episode 16950	Last length:   199	 Reward: -1368.99	 Avg Reward: -1333.22
Episode 17000	Last length:   199	 Reward: -1363.83	 Avg Reward: -1249.26
Episode 17050	Last length:   199	 Reward: -1398.08	 Avg Reward: -1325.63
Episode 17100	Last length:   199	 Reward: -1306.19	 Avg Reward: -1354.76
Episode 17150	Last length:   199	 Reward: -1286.03	 Avg Reward: -1303.25
Episode 17200	Last length:   199	 Reward: -1826.39	 Avg Reward: -1337.98
Episode 17250	Last length:   199	 Reward: -1170.61	 Avg Reward: -1330.19
Episode 17300	Last length:   199	 Reward: -1341.78	 Avg Reward: -1307.46
Episode 17350	Last length:   199	 Reward: -900.73	 Avg Reward: -1290.63
Episode 17400	Last length:   199	 Reward: -1724.79	 Avg Reward: -1360.06
Episode 17450	Last length:   199	 Reward: -1318.64	 Avg Reward: -1297.84
Episode 17500	Last length:   199	 Reward: -896.71	 Avg Reward: -1298.11
Episode 17550	Last length:   199	 Reward: -1725.57	 A

KeyboardInterrupt: 