In [None]:
import argparse
import gym
import numpy as np
from itertools import count
from collections import namedtuple

from DDPG.ddpg import DDPG

args = {
    'gamma': 0.99,
    'render': True,
    'log_interval': 1
}

env = gym.make('Acrobot-v1')
episodes = 100000
threshold = -100
reward_history = []


def main():   
    task = {
        'state_size': 6,
        'action_size': 3,
        'action_high': 1,
        'action_low': 0
    }
    agent = DDPG(task)        
    for i_episode in range(episodes):
        running_reward = 0        
        state = env.reset()
        for t in range(10000):  # Don't infinite loop while learning
            action, probs, noise_coeff = agent.act(state, i_episode)             
            state, reward, done, _ = env.step(action)    
            agent.step(probs, reward, state, done)
            if args['render']:
                env.render()                   
            running_reward += reward            
            if done:
                break
        
        reward_history.append(running_reward)
        
        if i_episode % args['log_interval'] == 0:
            avg_reward = np.mean(reward_history[-100:])   
            print('Episode {}\tLast length: {:5d}\t Reward: {:7.2f}\t Avg Reward: {:7.2f}\t Noise: {:.2f}'.format(
                i_episode, t, running_reward, avg_reward, noise_coeff))
        if avg_reward > threshold and i_episode > 100:
            print("Solved! Running reward is now {} and "
                  "the last episode runs to {} time steps!".format(running_reward, t))
            break

print("action_space={}".format(env.action_space))
print("obs_space={}".format(env.observation_space))
print("threshold={} \n".format(env.spec.reward_threshold))
main()



Using TensorFlow backend.


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
action_space=Discrete(3)
obs_space=Box(6,)
threshold=None 

Episode 0	Last length:   130	 Reward: -130.00	 Avg Reward: -130.00	 Noise: 1.00
Episode 1	Last length:   108	 Reward: -108.00	 Avg Reward: -119.00	 Noise: 1.00
Episode 2	Last length:   112	 Reward: -112.00	 Avg Reward: -116.67	 Noise: 1.00
Episode 3	Last length:   147	 Reward: -147.00	 Avg Reward: -124.25	 Noise: 1.00
Episode 4	Last length:   131	 Reward: -131.00	 Avg Reward: -125.60	 Noise: 1.00
Episode 5	Last length:   134	 Reward: -134.00	 Avg Reward: -127.00	 Noise: 1.00
Episode 6	Last length:   141	 Reward: -141.00	 Avg Reward: -129.00	 Noise: 1.00
Episode 7	Last length:   148	 Reward: -148.00	 Avg Reward: -131.38	 Noise: 1.00
Episode 8	Last length:   121	 Reward: -121.00	 Avg Reward: -130.22	 Noise: 1.00
Episode 9	Last length:   119	 Reward: -119.00	 Avg Reward: -129.10	 Noise: 1.00
Episode 10	Last length:   105	 R

Episode 100	Last length:   499	 Reward: -500.00	 Avg Reward: -440.11	 Noise: 0.97
Episode 101	Last length:   499	 Reward: -500.00	 Avg Reward: -444.03	 Noise: 0.97
Episode 102	Last length:   499	 Reward: -500.00	 Avg Reward: -447.91	 Noise: 0.97
Episode 103	Last length:   499	 Reward: -500.00	 Avg Reward: -451.44	 Noise: 0.97
Episode 104	Last length:   499	 Reward: -500.00	 Avg Reward: -455.13	 Noise: 0.97
Episode 105	Last length:   499	 Reward: -500.00	 Avg Reward: -458.79	 Noise: 0.96
Episode 106	Last length:   499	 Reward: -500.00	 Avg Reward: -462.38	 Noise: 0.96
Episode 107	Last length:   499	 Reward: -500.00	 Avg Reward: -465.90	 Noise: 0.96
Episode 108	Last length:   499	 Reward: -500.00	 Avg Reward: -469.69	 Noise: 0.96
Episode 109	Last length:   499	 Reward: -500.00	 Avg Reward: -473.50	 Noise: 0.96
Episode 110	Last length:   499	 Reward: -500.00	 Avg Reward: -477.45	 Noise: 0.96
Episode 111	Last length:   499	 Reward: -500.00	 Avg Reward: -481.31	 Noise: 0.96
Episode 112	Last