In [1]:
import argparse
import gym
import numpy as np
from itertools import count
from collections import namedtuple

from DDPG.ddpg import DDPG

args = {
    'render': True,
    'log_interval': 1
}

env = gym.make('MountainCarContinuous-v0')
episodes = 10000
reward_history = []


def main():   
    task = {
        'state_size': 2,
        'action_size': 1,
        'action_high': 1,
        'action_low': 0
    }
    agent = DDPG(task)    
    for i_episode in range(episodes):
        running_reward = 0     
        state = env.reset()
        for t in range(10000):  # Don't infinite loop while learning
            action, noise_coeff = agent.act(state, i_episode)            
            state, reward, done, _ = env.step(action)      
            agent.step(action, reward, state, done)
            if args['render']:
                env.render()                   
            running_reward += reward            
            if done:
                break
        
        reward_history.append(running_reward)
        
        if i_episode % args['log_interval'] == 0:
            avg_reward = np.mean(reward_history[-100:])  
            print('Episode {}   Last length: {:5d}   Reward: {:7.2f}   Avg Reward: {:7.2f}   Noise: {:.2f}'.format(
                i_episode, t, running_reward, avg_reward, noise_coeff))
        if avg_reward > env.spec.reward_threshold and i_episode > 100:
            print("Solved! Average 100-episode reward is now {}!".format(avg_reward))
            break

print("action_space={}".format(env.action_space))
print("obs_space={}".format(env.observation_space))
print("threshold={} \n".format(env.spec.reward_threshold))
main()



Using TensorFlow backend.


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
action_space=Box(1,)
obs_space=Box(2,)
threshold=90.0 

Episode 0   Last length:   998   Reward:  -25.23   Avg Reward:  -25.23   Noise: 1.00
Episode 1   Last length:   477   Reward:   89.94   Avg Reward:   32.35   Noise: 1.00
Episode 2   Last length:   666   Reward:   85.97   Avg Reward:   50.23   Noise: 1.00
Episode 3   Last length:   925   Reward:   15.62   Avg Reward:   41.57   Noise: 1.00
Episode 4   Last length:   503   Reward:   41.59   Avg Reward:   41.58   Noise: 1.00
Episode 5   Last length:   998   Reward: -121.60   Avg Reward:   14.38   Noise: 0.99
Episode 6   Last length:   998   Reward: -122.24   Avg Reward:   -5.14   Noise: 0.99
Episode 7   Last length:   998   Reward:  -78.01   Avg Reward:  -14.25   Noise: 0.99
Episode 8   Last length:   983   Reward:   79.11

Episode 93   Last length:   539   Reward:   62.60   Avg Reward:   22.02   Noise: 0.91
Episode 94   Last length:   257   Reward:   81.13   Avg Reward:   22.64   Noise: 0.91
Episode 95   Last length:   377   Reward:   70.47   Avg Reward:   23.14   Noise: 0.91
Episode 96   Last length:   229   Reward:   82.84   Avg Reward:   23.75   Noise: 0.90
Episode 97   Last length:   260   Reward:   75.71   Avg Reward:   24.28   Noise: 0.90
Episode 98   Last length:   524   Reward:   41.22   Avg Reward:   24.45   Noise: 0.90
Episode 99   Last length:   188   Reward:   84.28   Avg Reward:   25.05   Noise: 0.90
Episode 100   Last length:   179   Reward:   80.07   Avg Reward:   26.10   Noise: 0.90
Episode 101   Last length:   142   Reward:   88.72   Avg Reward:   26.09   Noise: 0.90
Episode 102   Last length:   332   Reward:   69.33   Avg Reward:   25.93   Noise: 0.90
Episode 103   Last length:   242   Reward:   85.78   Avg Reward:   26.63   Noise: 0.90
Episode 104   Last length:   234   Reward:   82.12

Episode 188   Last length:   114   Reward:   88.02   Avg Reward:   85.44   Noise: 0.81
Episode 189   Last length:   141   Reward:   87.58   Avg Reward:   85.55   Noise: 0.81
Episode 190   Last length:   131   Reward:   88.09   Avg Reward:   85.77   Noise: 0.81
Episode 191   Last length:   218   Reward:   88.10   Avg Reward:   86.00   Noise: 0.81
Episode 192   Last length:   131   Reward:   92.13   Avg Reward:   86.17   Noise: 0.81
Episode 193   Last length:   121   Reward:   89.95   Avg Reward:   86.45   Noise: 0.81
Episode 194   Last length:   129   Reward:   89.14   Avg Reward:   86.53   Noise: 0.81
Episode 195   Last length:   117   Reward:   89.25   Avg Reward:   86.71   Noise: 0.81
Episode 196   Last length:   126   Reward:   92.31   Avg Reward:   86.81   Noise: 0.80
Episode 197   Last length:   132   Reward:   87.13   Avg Reward:   86.92   Noise: 0.80
Episode 198   Last length:   146   Reward:   86.83   Avg Reward:   87.38   Noise: 0.80
Episode 199   Last length:   127   Reward: 

Episode 283   Last length:   128   Reward:   91.99   Avg Reward:   89.73   Noise: 0.72
Episode 284   Last length:   111   Reward:   90.61   Avg Reward:   89.81   Noise: 0.72
Episode 285   Last length:   116   Reward:   89.83   Avg Reward:   89.89   Noise: 0.71
Episode 286   Last length:   122   Reward:   87.79   Avg Reward:   89.88   Noise: 0.71
Episode 287   Last length:   113   Reward:   88.44   Avg Reward:   89.86   Noise: 0.71
Episode 288   Last length:   114   Reward:   91.31   Avg Reward:   89.90   Noise: 0.71
Episode 289   Last length:   120   Reward:   93.49   Avg Reward:   89.96   Noise: 0.71
Episode 290   Last length:   111   Reward:   88.39   Avg Reward:   89.96   Noise: 0.71
Episode 291   Last length:   192   Reward:   87.87   Avg Reward:   89.96   Noise: 0.71
Episode 292   Last length:   122   Reward:   89.36   Avg Reward:   89.93   Noise: 0.71
Episode 293   Last length:   126   Reward:   91.45   Avg Reward:   89.94   Noise: 0.71
Episode 294   Last length:   108   Reward: 