In [1]:
import argparse
import gym
import numpy as np
from itertools import count
from collections import namedtuple

from DDPG.ddpg import DDPG

args = {
    'render': True,
    'log_interval': 1
}

env = gym.make('MountainCarContinuous-v0')
episodes = 10000
reward_history = []


def main():   
    task = {
        'state_size': 2,
        'action_size': 1,
        'action_high': 1,
        'action_low': 0
    }
    agent = DDPG(task)    
    for i_episode in range(episodes):
        running_reward = 0     
        state = env.reset()
        for t in range(10000):  # Don't infinite loop while learning
            action, noise_coeff = agent.act(state, i_episode)            
            state, reward, done, _ = env.step(action)      
            agent.step(action, reward, state, done)
            if args['render']:
                env.render()                   
            running_reward += reward            
            if done:
                break
        
        reward_history.append(running_reward)
        
        if i_episode % args['log_interval'] == 0:
            avg_reward = np.mean(reward_history[-100:])  
            print('Episode {}   Last length: {:5d}   Reward: {:7.2f}   Avg Reward: {:7.2f}   Noise: {:.2f}'.format(
                i_episode, t, running_reward, avg_reward, noise_coeff))
        if avg_reward > env.spec.reward_threshold and i_episode > 100:
            print("Solved! Average 100-episode reward is now {}!".format(avg_reward))
            break

print("action_space={}".format(env.action_space))
print("obs_space={}".format(env.observation_space))
print("threshold={} \n".format(env.spec.reward_threshold))
main()



Using TensorFlow backend.


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
action_space=Box(1,)
obs_space=Box(2,)
threshold=90.0 

Episode 0   Last length:   998   Reward:  -29.63   Avg Reward:  -29.63   Noise: 1.00
Episode 1   Last length:   998   Reward:  -19.97   Avg Reward:  -24.80   Noise: 1.00
Episode 2   Last length:   998   Reward:  -20.58   Avg Reward:  -23.40   Noise: 1.00
Episode 3   Last length:   998   Reward:  -19.77   Avg Reward:  -22.49   Noise: 1.00
Episode 4   Last length:   998   Reward:  -19.97   Avg Reward:  -21.98   Noise: 1.00
Episode 5   Last length:   998   Reward:  -18.84   Avg Reward:  -21.46   Noise: 0.99
Episode 6   Last length:   998   Reward:  -20.11   Avg Reward:  -21.27   Noise: 0.99
Episode 7   Last length:   998   Reward:  -18.93   Avg Reward:  -20.97   Noise: 0.99
Episode 8   Last length:   730   Reward:   85.55

Episode 93   Last length:   629   Reward:   32.16   Avg Reward:   40.21   Noise: 0.91
Episode 94   Last length:   674   Reward:   28.36   Avg Reward:   40.09   Noise: 0.91
Episode 95   Last length:   231   Reward:   78.82   Avg Reward:   40.49   Noise: 0.91
Episode 96   Last length:   307   Reward:   74.58   Avg Reward:   40.84   Noise: 0.90
Episode 97   Last length:   319   Reward:   79.46   Avg Reward:   41.24   Noise: 0.90
Episode 98   Last length:   139   Reward:   85.94   Avg Reward:   41.69   Noise: 0.90
Episode 99   Last length:   224   Reward:   73.60   Avg Reward:   42.01   Noise: 0.90
Episode 100   Last length:   243   Reward:   81.99   Avg Reward:   43.12   Noise: 0.90
Episode 101   Last length:   197   Reward:   87.74   Avg Reward:   44.20   Noise: 0.90
Episode 102   Last length:   262   Reward:   72.47   Avg Reward:   45.13   Noise: 0.90
Episode 103   Last length:   802   Reward:   13.11   Avg Reward:   45.46   Noise: 0.90
Episode 104   Last length:   387   Reward:   63.21

Episode 188   Last length:   187   Reward:   83.11   Avg Reward:   81.00   Noise: 0.81
Episode 189   Last length:   127   Reward:   87.51   Avg Reward:   81.23   Noise: 0.81
Episode 190   Last length:   122   Reward:   90.72   Avg Reward:   81.87   Noise: 0.81
Episode 191   Last length:   135   Reward:   90.75   Avg Reward:   82.16   Noise: 0.81
Episode 192   Last length:   129   Reward:   89.66   Avg Reward:   82.64   Noise: 0.81
Episode 193   Last length:   177   Reward:   87.76   Avg Reward:   83.20   Noise: 0.81
Episode 194   Last length:   119   Reward:   89.58   Avg Reward:   83.81   Noise: 0.81
Episode 195   Last length:   216   Reward:   84.72   Avg Reward:   83.87   Noise: 0.81
Episode 196   Last length:   128   Reward:   88.29   Avg Reward:   84.00   Noise: 0.80
Episode 197   Last length:   382   Reward:   63.10   Avg Reward:   83.84   Noise: 0.80
Episode 198   Last length:   330   Reward:   74.75   Avg Reward:   83.73   Noise: 0.80
Episode 199   Last length:   146   Reward: 

Episode 283   Last length:   137   Reward:   87.05   Avg Reward:   88.29   Noise: 0.72
Episode 284   Last length:   119   Reward:   88.68   Avg Reward:   88.27   Noise: 0.72
Episode 285   Last length:   132   Reward:   87.71   Avg Reward:   88.26   Noise: 0.71
Episode 286   Last length:   258   Reward:   88.79   Avg Reward:   88.24   Noise: 0.71
Episode 287   Last length:   275   Reward:   92.33   Avg Reward:   88.28   Noise: 0.71
Episode 288   Last length:   387   Reward:   83.85   Avg Reward:   88.28   Noise: 0.71
Episode 289   Last length:   273   Reward:   91.97   Avg Reward:   88.33   Noise: 0.71
Episode 290   Last length:   154   Reward:   89.95   Avg Reward:   88.32   Noise: 0.71
Episode 291   Last length:   136   Reward:   86.99   Avg Reward:   88.28   Noise: 0.71
Episode 292   Last length:   124   Reward:   90.71   Avg Reward:   88.29   Noise: 0.71
Episode 293   Last length:   118   Reward:   88.86   Avg Reward:   88.31   Noise: 0.71
Episode 294   Last length:   121   Reward: 