In [None]:
import argparse
import gym
import numpy as np
from itertools import count
from collections import namedtuple

from DDPG.ddpg import DDPG

args = {
    'render': True,
    'log_interval': 50
}
env = gym.make('CartPole-v1')
episodes = 100000
reward_history = []
threshold = 195


def main():   
    task = {
        'state_size': 4,
        'action_size': 1,
        'action_high': 1,
        'action_low': 0
    }
    agent = DDPG(task)    
    for i_episode in range(episodes):
        running_reward = 0        
        state = env.reset()
        for t in range(10000):  # Don't infinite loop while learning
            action, noise_coeff = agent.act(state, i_episode)                
            state, reward, done, _ = env.step(action)  
            agent.step(action, reward, state, done)
            if args['render']:
                env.render()                   
            running_reward += reward            
            if done:
                break
        
        reward_history.append(running_reward)
        
        avg_reward = np.mean(reward_history[-100:])    
        if i_episode % args['log_interval'] == 0:                    
            print('Episode {}   Last length: {:5d}   Reward: {:7.2f}   Avg Reward: {:7.2f}   Noise: {:.2f}'.format(
                i_episode, t, running_reward, avg_reward, noise_coeff))
        if avg_reward > threshold and i_episode > 100:
            print("Solved! Average 100-episode reward is now {}!".format(avg_reward))
            break
            
print("action_space={}".format(env.action_space))
print("obs_space={}".format(env.observation_space))
print("threshold={} \n".format(env.spec.reward_threshold))
main()



Using TensorFlow backend.


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
action_space=Discrete(2)
obs_space=Box(4,)
threshold=475.0 

Episode 0   Last length:    11   Reward:   12.00   Avg Reward:   12.00   Noise: 1.00
Episode 50   Last length:    19   Reward:   20.00   Avg Reward:   16.18   Noise: 0.99
Episode 100   Last length:    10   Reward:   11.00   Avg Reward:   15.63   Noise: 0.99
Episode 150   Last length:    20   Reward:   21.00   Avg Reward:   13.72   Noise: 0.98
Episode 200   Last length:     8   Reward:    9.00   Avg Reward:   11.57   Noise: 0.98
Episode 250   Last length:     9   Reward:   10.00   Avg Reward:   10.18   Noise: 0.97
Episode 300   Last length:    14   Reward:   15.00   Avg Reward:   10.07   Noise: 0.97
Episode 350   Last length:     8   Reward:    9.00   Avg Reward:   12.38   Noise: 0.96
Episode 400   Last length:    34   Reward:   35.00   Avg Reward:   16.52   Noise: 0.96
Episode 450   Last length:    48   Reward:   49.00 

Episode 4600   Last length:    31   Reward:   32.00   Avg Reward:   21.43   Noise: 0.54
Episode 4650   Last length:    26   Reward:   27.00   Avg Reward:   20.20   Noise: 0.54
Episode 4700   Last length:    10   Reward:   11.00   Avg Reward:   17.54   Noise: 0.53
Episode 4750   Last length:    10   Reward:   11.00   Avg Reward:   16.27   Noise: 0.53
Episode 4800   Last length:    23   Reward:   24.00   Avg Reward:   15.51   Noise: 0.52
Episode 4850   Last length:    21   Reward:   22.00   Avg Reward:   15.65   Noise: 0.52
Episode 4900   Last length:    10   Reward:   11.00   Avg Reward:   15.39   Noise: 0.51
Episode 4950   Last length:    20   Reward:   21.00   Avg Reward:   14.99   Noise: 0.51
Episode 5000   Last length:    17   Reward:   18.00   Avg Reward:   15.51   Noise: 0.50
Episode 5050   Last length:    22   Reward:   23.00   Avg Reward:   18.17   Noise: 0.49
Episode 5100   Last length:    19   Reward:   20.00   Avg Reward:   28.25   Noise: 0.49
Episode 5150   Last length:    5