In [2]:
import numpy as np
import pandas as pd
import gym
#import logz_pytorch as logz
import logz
import scipy.signal
import os
import time
import inspect

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable

#============================================================================================#
# Utilities
#============================================================================================#


def pathlength(path):
    return len(path["reward"])


class MLP(nn.Module):

    # ========================================================================================#
    #                           ----------SECTION 3----------
    # Network building
    #
    # Your code should make a feedforward neural network (also called a multilayer perceptron)
    # with 'n_layers' hidden layers of size 'size' units.
    #
    # The output layer should have size 'output_size' and activation 'output_activation'.
    #
    # ========================================================================================#

    def __init__(self, input_size, output_size, n_layers=2, size=64, activation=F.tanh, output_activation=None):
        super(MLP, self).__init__()
        self.dropout_rate = 0.3
        self.fc1 = nn.Linear(input_size, size)
        self.fc2 = nn.Linear(size, size)
        self.fc3 = nn.Linear(size, output_size)
#         self.fc = (
#             [nn.Linear(input_size, size)] + 
#             [nn.Linear(size, size)] * n_layers + 
#             [nn.Linear(size, output_size)]
#         )
        self.drop = nn.Dropout(p = self.dropout_rate)
        self.activation = activation
        self.output_activation = output_activation

    # assert output_activation is None, 'output activation must be None, other options not implemented'
 
    def forward(self, x):
        x = self.activation(self.fc1(x))  
#        x = self.activation(self.fc2(x))  
        if self.output_activation is None:
            x = self.fc3(x)                      # Last layer 
        else:
            x = self.output_activation(self.fc3(x))  
        return x.view(x.size(0), -1)

    def set_dropout_rate(self, p):
        self.dropout_rate = p
        

#============================================================================================#
# Policy Gradient
#============================================================================================#

def train_PG(exp_name='',
             env_name='CartPole-v0',
             n_iter=100,
             gamma=1.0,
             min_timesteps_per_batch=1000,
             max_path_length=None,
             learning_rate=5e-3,
             reward_to_go=True,
             animate=True,
             logdir=None,
             normalize_advantages=True,
             nn_baseline=False,
             seed=0,
             # network arguments
             n_layers=1,
             size=32
             ):

    start = time.time()

    # Configure output directory for logging
    logz.configure_output_dir(logdir)
    logz.G.first_row = True
    # Log experimental parameters
    #args = inspect.getfullargspec(train_PG)[0]
    args = inspect.getargspec(train_PG)[0]
    locals_ = locals()
    params = {k: locals_[k] if k in locals_ else None for k in args}
    logz.save_params(params)

    # Set random seeds
    torch.manual_seed(seed)
    np.random.seed(seed)

    # Make the gym environment
    env = gym.make(env_name)

    # Is this env continuous, or discrete?
    discrete = isinstance(env.action_space, gym.spaces.Discrete)

    # Maximum length for episodes
    max_path_length = max_path_length or env.spec.max_episode_steps

    #========================================================================================#
    # Notes on notation:
    #
    # Symbolic variables have the prefix sy_, to distinguish them from the numerical values
    # that are computed later in the function
    #
    # Prefixes and suffixes:
    # ob - observation
    # ac - action
    # _no - this tensor should have shape (batch size /n/, observation dim)
    # _na - this tensor should have shape (batch size /n/, action dim)
    # _n  - this tensor should have shape (batch size /n/)
    #
    # Note: batch size /n/ is defined at runtime, and until then, the shape for that axis
    # is None
    #========================================================================================#

    # Observation and action sizes
    ob_dim = env.observation_space.shape[0]
    ac_dim = env.action_space.n if discrete else env.action_space.shape[0]

    mlp = MLP(input_size = ob_dim, output_size = ac_dim, 
              n_layers = n_layers, size = size,output_activation=None)
#     loss = TODO # Loss function that we'll differentiate to get the policy gradient.
    update_op = optim.Adam(mlp.parameters(), lr=learning_rate)


    #========================================================================================#
    #                           ----------SECTION 5----------
    # Optional Baseline
    #========================================================================================#

    if nn_baseline:
        mlp_baseline = MLP(ob_dim, 1, n_layers=n_layers, size=size,output_activation=None)
        update_baseline_op = optim.Adam(mlp_baseline.parameters(), lr=learning_rate)
        q_prev_mean, q_prev_std = 0., 1.


    #========================================================================================#
    # Training Loop
    #========================================================================================#

    total_timesteps = 0

    for itr in range(n_iter):
        print("********** Iteration %i ************"%itr)

        # Collect paths until we have enough timesteps
        timesteps_this_batch = 0
        paths = []
        while True:
            ob = env.reset()
            obs, acs, rewards = [], [], []
            animate_this_episode=(len(paths)==0 and (itr % 10 == 0) and animate)
            steps = 0
            while True:
                if animate_this_episode:
                    env.render()
                    time.sleep(0.05)
                obs.append(ob)

                #TODO: CHECK_IT!
#                 ac_logits = mlp(Variable(torch.Tensor(ob[None]))).data.numpy()


                mu = mlp(Variable(torch.Tensor(ob[None]))).data.numpy()[0][0]
                std = 0.01
        
        
                ac_raw = np.random.randn()
                ac = ac_raw * std + mu 
                # Pick action according to mlp policy. mlp output is actions logits

#                 ac_probs = 1. / (1 + np.exp( -ac_logits))
#                 ac_probs = ac_probs / ac_probs.sum()

#                 ac = np.random.choice(range(ac_dim), p = ac_logits)
                
                #ac = ac[0]
                acs.append(ac)
                #1/0
                ob, rew, done, _ = env.step([ac])
                rewards.append(rew)
                steps += 1
                if done or steps > max_path_length:
                    break
            path = {"observation" : np.array(obs),
                    "reward" : np.array(rewards),
                    "action" : np.array(acs)}
            paths.append(path)
            timesteps_this_batch += pathlength(path)
            if timesteps_this_batch > min_timesteps_per_batch:
                break
        total_timesteps += timesteps_this_batch

        # Build arrays for observation, action for the policy gradient update by concatenating
        # across paths
        ob_no = np.concatenate([path["observation"] for path in paths])
        ac_na = np.concatenate([path["action"] for path in paths])

 
        # YOUR_CODE_HERE
        if reward_to_go:
            q_n = np.concatenate([
                                    np.cumsum([r * gamma ** i for i,r in enumerate(path["reward"])][::-1])[::-1]                
                                 for path in paths])
        else:            
            q_n = np.concatenate([
                                    [sum([r * gamma ** i for i,r in enumerate(path["reward"])])] 
                                        * len(path["reward"]) 
                                 for path in paths])
            
        #====================================================================================#
        #                           ----------SECTION 5----------
        # Computing Baselines
        #====================================================================================#

        if nn_baseline:
            # If nn_baseline is True, use your neural network to predict reward-to-go
            # at each timestep for each trajectory, and save the result in a variable 'b_n'
            # like 'ob_no', 'ac_na', and 'q_n'.
            #
            # Hint #bl1: rescale the output from the nn_baseline to match the statistics
            # (mean and std) of the current or previous batch of Q-values. (Goes with Hint
            # #bl2 below.)
            v_n = mlp_baseline(Variable(torch.Tensor(ob_no)))[:,0]
            b_n = v_n.data.numpy() * q_prev_std + q_prev_mean
            adv_n = q_n - b_n
            q_prev_mean, q_prev_std = q_n.mean(),q_n.std()
            
        else:
            adv_n = q_n.copy()

        #====================================================================================#
        #                           ----------SECTION 4----------
        # Advantage Normalization
        #====================================================================================#

        if normalize_advantages:
            # On the next line, implement a trick which is known empirically to reduce variance
            # in policy gradient methods: normalize adv_n to have mean zero and std=1.
            # YOUR_CODE_HERE
            adv_n = (adv_n - adv_n.mean())/(adv_n.std() + 1e-15)


        #====================================================================================#
        #                           ----------SECTION 5----------
        # Optimizing Neural Network Baseline
        #====================================================================================#

        if nn_baseline:
            # ----------SECTION 5----------
            # If a neural network baseline is used, set up the targets and the inputs for the
            # baseline.
            #
            # Fit it to the current batch in order to use for the next iteration. Use the
            # baseline_update_op you defined earlier.
            #
            # Hint #bl2: Instead of trying to target raw Q-values directly, rescale the
            # targets to have mean zero and std=1. (Goes with Hint #bl1 above.)

            baseline_loss = ((v_n - Variable(torch.Tensor((q_n-q_n.mean())/q_n.std())))**2).mean()
            update_baseline_op.zero_grad()
            baseline_loss.backward()
            update_baseline_op.step()
            

        #====================================================================================#
        #                           ----------SECTION 4----------
        # Performing the Policy Update
        #====================================================================================#

        # Call the update operation necessary to perform the policy gradient update based on
        # the current batch of rollouts.
        #
        # For debug purposes, you may wish to save the value of the loss function before
        # and after an update, and then log them below.

#         PG = E_{tau} [sum_{t=0}^T grad log pi(a_t|s_t) * (Q_t - b_t )]
        # YOUR_CODE_HERE

        #Another solution
#         a,ac,a.view(-1)[torch.LongTensor(ac+3*np.arange(20))]
        adv_var = torch.autograd.Variable(torch.FloatTensor(adv_n) , requires_grad=False)
    
#         actions_t = torch.LongTensor(ac_na+ac_dim*np.arange(ob_no.shape[0]))         
        PGI = (((((mlp(Variable(torch.Tensor(ob_no)))[:,0] - Variable(torch.Tensor(ac_na)))/std)**2) 
                * adv_var).sum() 
               / len(paths)
              )
        print('avg action {}. abs {}'.format(ac_na.mean(),np.abs(ac_na).mean()))
        update_op.zero_grad()
        PGI.backward()
        update_op.step()

        
        
        # Log diagnostics
        returns = [path["reward"].sum() for path in paths]
        ep_lengths = [pathlength(path) for path in paths]
        logz.log_tabular("Time", time.time() - start)
        logz.log_tabular("Iteration", itr)
        logz.log_tabular("AverageReturn", np.mean(returns))
        logz.log_tabular("StdReturn", np.std(returns))
        logz.log_tabular("MaxReturn", np.max(returns))
        logz.log_tabular("MinReturn", np.min(returns))
        logz.log_tabular("EpLenMean", np.mean(ep_lengths))
        logz.log_tabular("EpLenStd", np.std(ep_lengths))
        logz.log_tabular("TimestepsThisBatch", timesteps_this_batch)
        logz.log_tabular("TimestepsSoFar", total_timesteps)
        logz.dump_tabular()


def main():
    import argparse
#     parser = argparse.ArgumentParser()
#     parser.add_argument('env_name', type=str)
#     parser.add_argument('--exp_name', type=str, default='vpg')
#     parser.add_argument('--render', action='store_true')
#     parser.add_argument('--discount', type=float, default=1.0)
#     parser.add_argument('--n_iter', '-n', type=int, default=100)
#     parser.add_argument('--batch_size', '-b', type=int, default=1000)
#     parser.add_argument('--ep_len', '-ep', type=float, default=-1.)
#     parser.add_argument('--learning_rate', '-lr', type=float, default=5e-3)
#     parser.add_argument('--reward_to_go', '-rtg', action='store_true')
#     parser.add_argument('--dont_normalize_advantages', '-dna', action='store_true')
#     parser.add_argument('--nn_baseline', '-bl', action='store_true')
#     parser.add_argument('--seed', type=int, default=1)
#     parser.add_argument('--n_experiments', '-e', type=int, default=1)
#     parser.add_argument('--n_layers', '-l', type=int, default=1)
#     parser.add_argument('--size', '-s', type=int, default=32)
#     args = parser.parse_args()

    args = pd.Series()
    args.batch_size=1000
    args.discount=0.99
    args.dont_normalize_advantages=True
    args.env_name='Pendulum-v0'
    args.ep_len=-1.0
    args.exp_name='lr_1e-2_rtg'
    args.learning_rate=1e-3
    args.n_experiments=4
    args.n_iter=100
    args.n_layers=1
    args.nn_baseline=False
    args.render=False
    args.reward_to_go=True
    args.seed=1
    args.size_=64

                
    if not(os.path.exists('data')):
        os.makedirs('data')
    logdir = args.exp_name + '_' + args.env_name + '_' + time.strftime("%d-%m-%Y_%H-%M-%S")
    logdir = os.path.join('data', logdir)
    if not(os.path.exists(logdir)):
        os.makedirs(logdir)

    max_path_length = args.ep_len if args.ep_len > 0 else None

    for e in range(args.n_experiments):
        seed = args.seed + 10*e
        print('Running experiment with seed %d'%seed)
        def train_func():
            train_PG(
                exp_name=args.exp_name,
                env_name=args.env_name,
                n_iter=args.n_iter,
                gamma=args.discount,
                min_timesteps_per_batch=args.batch_size,
                max_path_length=max_path_length,
                learning_rate=args.learning_rate,
                reward_to_go=args.reward_to_go,
                animate=args.render,
                logdir=os.path.join(logdir,'%d'%seed),
                normalize_advantages=not(args.dont_normalize_advantages),
                nn_baseline=args.nn_baseline,
                seed=seed,
                n_layers=args.n_layers,
                size=args.size_
                )
        train_func()


if __name__ == "__main__":
    main()

[2017-12-12 00:26:25,107] Making new env: Pendulum-v0


Running experiment with seed 1
[32;1mLogging data to data/lr_1e-2_rtg_Pendulum-v0_12-12-2017_00-26-25/1/log.txt[0m
********** Iteration 0 ************
avg action 0.08570540503243124. abs 0.30817467725076764
----------------------------------------
|               Time |           0.594 |
|          Iteration |               0 |
|      AverageReturn |       -1.41e+03 |
|          StdReturn |             258 |
|          MaxReturn |       -1.17e+03 |
|          MinReturn |       -1.91e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         1.2e+03 |
|               Time |           0.594 |
|          Iteration |               0 |
|      AverageReturn |       -1.41e+03 |
|          StdReturn |             258 |
|          MaxReturn |       -1.17e+03 |
|          MinReturn |       -1.91e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsT

avg action -0.030543476301173135. abs 0.4082361320037063
----------------------------------------
|               Time |             2.4 |
|          Iteration |               4 |
|      AverageReturn |       -1.14e+03 |
|          StdReturn |            46.3 |
|          MaxReturn |       -1.05e+03 |
|          MinReturn |        -1.2e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |           6e+03 |
|               Time |             2.4 |
|          Iteration |               4 |
|      AverageReturn |       -1.14e+03 |
|          StdReturn |            46.3 |
|          MaxReturn |       -1.05e+03 |
|          MinReturn |        -1.2e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |           6e+03 |
|               Time |             2.4 |
|          Iteration |               4 |


avg action 0.19901230479329202. abs 0.486448134014553
----------------------------------------
|               Time |            4.14 |
|          Iteration |               8 |
|      AverageReturn |       -1.16e+03 |
|          StdReturn |            45.2 |
|          MaxReturn |       -1.08e+03 |
|          MinReturn |       -1.21e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.08e+04 |
|               Time |            4.14 |
|          Iteration |               8 |
|      AverageReturn |       -1.16e+03 |
|          StdReturn |            45.2 |
|          MaxReturn |       -1.08e+03 |
|          MinReturn |       -1.21e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.08e+04 |
|               Time |            4.14 |
|          Iteration |               8 |
|  

avg action -0.0028148397021520682. abs 0.5295391851388493
----------------------------------------
|               Time |            5.94 |
|          Iteration |              12 |
|      AverageReturn |       -1.19e+03 |
|          StdReturn |            42.5 |
|          MaxReturn |       -1.15e+03 |
|          MinReturn |       -1.27e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.56e+04 |
|               Time |            5.94 |
|          Iteration |              12 |
|      AverageReturn |       -1.19e+03 |
|          StdReturn |            42.5 |
|          MaxReturn |       -1.15e+03 |
|          MinReturn |       -1.27e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.56e+04 |
|               Time |            5.94 |
|          Iteration |              12 |

avg action 0.2607835544005706. abs 0.6010891214701876
----------------------------------------
|               Time |            7.67 |
|          Iteration |              16 |
|      AverageReturn |       -1.29e+03 |
|          StdReturn |            59.1 |
|          MaxReturn |       -1.17e+03 |
|          MinReturn |       -1.35e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        2.04e+04 |
|               Time |            7.67 |
|          Iteration |              16 |
|      AverageReturn |       -1.29e+03 |
|          StdReturn |            59.1 |
|          MaxReturn |       -1.17e+03 |
|          MinReturn |       -1.35e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        2.04e+04 |
|               Time |            7.67 |
|          Iteration |              16 |
|  

avg action 0.12797993201006122. abs 0.6115002331860884
----------------------------------------
|               Time |            9.38 |
|          Iteration |              20 |
|      AverageReturn |       -1.34e+03 |
|          StdReturn |             101 |
|          MaxReturn |        -1.2e+03 |
|          MinReturn |       -1.53e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        2.52e+04 |
|               Time |            9.38 |
|          Iteration |              20 |
|      AverageReturn |       -1.34e+03 |
|          StdReturn |             101 |
|          MaxReturn |        -1.2e+03 |
|          MinReturn |       -1.53e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        2.52e+04 |
|               Time |            9.38 |
|          Iteration |              20 |
| 

avg action -0.46603083173959353. abs 0.7178277496867819
----------------------------------------
|               Time |            11.1 |
|          Iteration |              24 |
|      AverageReturn |       -1.29e+03 |
|          StdReturn |            28.5 |
|          MaxReturn |       -1.24e+03 |
|          MinReturn |       -1.33e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |           3e+04 |
|               Time |            11.1 |
|          Iteration |              24 |
|      AverageReturn |       -1.29e+03 |
|          StdReturn |            28.5 |
|          MaxReturn |       -1.24e+03 |
|          MinReturn |       -1.33e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |           3e+04 |
|               Time |            11.1 |
|          Iteration |              24 |
|

avg action -0.23453453268884222. abs 0.8286778058748
----------------------------------------
|               Time |            12.8 |
|          Iteration |              28 |
|      AverageReturn |       -1.31e+03 |
|          StdReturn |            45.9 |
|          MaxReturn |       -1.22e+03 |
|          MinReturn |       -1.35e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        3.48e+04 |
|               Time |            12.8 |
|          Iteration |              28 |
|      AverageReturn |       -1.31e+03 |
|          StdReturn |            45.9 |
|          MaxReturn |       -1.22e+03 |
|          MinReturn |       -1.35e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        3.48e+04 |
|               Time |            12.8 |
|          Iteration |              28 |
|   

avg action -0.3047871571579272. abs 0.7658453011055535
----------------------------------------
|               Time |            14.6 |
|          Iteration |              32 |
|      AverageReturn |       -1.33e+03 |
|          StdReturn |            30.1 |
|          MaxReturn |       -1.28e+03 |
|          MinReturn |       -1.37e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        3.96e+04 |
|               Time |            14.6 |
|          Iteration |              32 |
|      AverageReturn |       -1.33e+03 |
|          StdReturn |            30.1 |
|          MaxReturn |       -1.28e+03 |
|          MinReturn |       -1.37e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        3.96e+04 |
|               Time |            14.6 |
|          Iteration |              32 |
| 

avg action -0.018063386864663494. abs 0.8341093475120424
----------------------------------------
|               Time |            16.3 |
|          Iteration |              36 |
|      AverageReturn |       -1.31e+03 |
|          StdReturn |            49.4 |
|          MaxReturn |        -1.2e+03 |
|          MinReturn |       -1.34e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        4.44e+04 |
|               Time |            16.3 |
|          Iteration |              36 |
|      AverageReturn |       -1.31e+03 |
|          StdReturn |            49.4 |
|          MaxReturn |        -1.2e+03 |
|          MinReturn |       -1.34e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        4.44e+04 |
|               Time |            16.3 |
|          Iteration |              36 |


avg action -0.5811733503231096. abs 0.6615383663028264
----------------------------------------
|               Time |              18 |
|          Iteration |              40 |
|      AverageReturn |       -1.44e+03 |
|          StdReturn |             176 |
|          MaxReturn |       -1.34e+03 |
|          MinReturn |       -1.83e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        4.92e+04 |
|               Time |              18 |
|          Iteration |              40 |
|      AverageReturn |       -1.44e+03 |
|          StdReturn |             176 |
|          MaxReturn |       -1.34e+03 |
|          MinReturn |       -1.83e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        4.92e+04 |
|               Time |              18 |
|          Iteration |              40 |
| 

avg action -0.11464258115776196. abs 0.8057730229860266
----------------------------------------
|               Time |              20 |
|          Iteration |              44 |
|      AverageReturn |       -1.32e+03 |
|          StdReturn |            56.6 |
|          MaxReturn |       -1.22e+03 |
|          MinReturn |       -1.39e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         5.4e+04 |
|               Time |              20 |
|          Iteration |              44 |
|      AverageReturn |       -1.32e+03 |
|          StdReturn |            56.6 |
|          MaxReturn |       -1.22e+03 |
|          MinReturn |       -1.39e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         5.4e+04 |
|               Time |              20 |
|          Iteration |              44 |
|

avg action 0.6623318932135026. abs 0.9536434878170299
----------------------------------------
|               Time |            21.8 |
|          Iteration |              48 |
|      AverageReturn |       -1.35e+03 |
|          StdReturn |            28.8 |
|          MaxReturn |        -1.3e+03 |
|          MinReturn |       -1.38e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        5.88e+04 |
|               Time |            21.8 |
|          Iteration |              48 |
|      AverageReturn |       -1.35e+03 |
|          StdReturn |            28.8 |
|          MaxReturn |        -1.3e+03 |
|          MinReturn |       -1.38e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        5.88e+04 |
|               Time |            21.8 |
|          Iteration |              48 |
|  

avg action 0.6271178384214192. abs 0.973568170930796
----------------------------------------
|               Time |            23.5 |
|          Iteration |              52 |
|      AverageReturn |       -1.36e+03 |
|          StdReturn |            23.2 |
|          MaxReturn |       -1.34e+03 |
|          MinReturn |       -1.41e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        6.36e+04 |
|               Time |            23.5 |
|          Iteration |              52 |
|      AverageReturn |       -1.36e+03 |
|          StdReturn |            23.2 |
|          MaxReturn |       -1.34e+03 |
|          MinReturn |       -1.41e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        6.36e+04 |
|               Time |            23.5 |
|          Iteration |              52 |
|   

avg action 0.09705689699843298. abs 0.9138321975320783
----------------------------------------
|               Time |            25.2 |
|          Iteration |              56 |
|      AverageReturn |       -1.34e+03 |
|          StdReturn |            55.6 |
|          MaxReturn |       -1.24e+03 |
|          MinReturn |       -1.42e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        6.84e+04 |
|               Time |            25.2 |
|          Iteration |              56 |
|      AverageReturn |       -1.34e+03 |
|          StdReturn |            55.6 |
|          MaxReturn |       -1.24e+03 |
|          MinReturn |       -1.42e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        6.84e+04 |
|               Time |            25.2 |
|          Iteration |              56 |
| 

avg action 0.2288956374408258. abs 0.9334025576029555
----------------------------------------
|               Time |              27 |
|          Iteration |              60 |
|      AverageReturn |       -1.37e+03 |
|          StdReturn |            28.6 |
|          MaxReturn |       -1.34e+03 |
|          MinReturn |       -1.42e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        7.32e+04 |
|               Time |              27 |
|          Iteration |              60 |
|      AverageReturn |       -1.37e+03 |
|          StdReturn |            28.6 |
|          MaxReturn |       -1.34e+03 |
|          MinReturn |       -1.42e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        7.32e+04 |
|               Time |              27 |
|          Iteration |              60 |
|  

avg action -0.21646410762378565. abs 0.8520660026402506
----------------------------------------
|               Time |            28.7 |
|          Iteration |              64 |
|      AverageReturn |       -1.35e+03 |
|          StdReturn |            56.9 |
|          MaxReturn |       -1.25e+03 |
|          MinReturn |       -1.41e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         7.8e+04 |
|               Time |            28.7 |
|          Iteration |              64 |
|      AverageReturn |       -1.35e+03 |
|          StdReturn |            56.9 |
|          MaxReturn |       -1.25e+03 |
|          MinReturn |       -1.41e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         7.8e+04 |
|               Time |            28.7 |
|          Iteration |              64 |
|

avg action 0.10052629961074899. abs 1.0756577901133788
----------------------------------------
|               Time |            30.4 |
|          Iteration |              68 |
|      AverageReturn |       -1.39e+03 |
|          StdReturn |            37.5 |
|          MaxReturn |       -1.36e+03 |
|          MinReturn |       -1.46e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        8.28e+04 |
|               Time |            30.4 |
|          Iteration |              68 |
|      AverageReturn |       -1.39e+03 |
|          StdReturn |            37.5 |
|          MaxReturn |       -1.36e+03 |
|          MinReturn |       -1.46e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        8.28e+04 |
|               Time |            30.4 |
|          Iteration |              68 |
| 

avg action 0.07676573234693447. abs 1.0900002711206798
----------------------------------------
|               Time |            32.2 |
|          Iteration |              72 |
|      AverageReturn |       -1.43e+03 |
|          StdReturn |            16.6 |
|          MaxReturn |       -1.41e+03 |
|          MinReturn |       -1.45e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        8.76e+04 |
|               Time |            32.2 |
|          Iteration |              72 |
|      AverageReturn |       -1.43e+03 |
|          StdReturn |            16.6 |
|          MaxReturn |       -1.41e+03 |
|          MinReturn |       -1.45e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        8.76e+04 |
|               Time |            32.2 |
|          Iteration |              72 |
| 

avg action 0.15788716262570446. abs 1.1344413484622788
----------------------------------------
|               Time |            33.9 |
|          Iteration |              76 |
|      AverageReturn |       -1.41e+03 |
|          StdReturn |              60 |
|          MaxReturn |       -1.34e+03 |
|          MinReturn |       -1.49e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        9.24e+04 |
|               Time |            33.9 |
|          Iteration |              76 |
|      AverageReturn |       -1.41e+03 |
|          StdReturn |              60 |
|          MaxReturn |       -1.34e+03 |
|          MinReturn |       -1.49e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        9.24e+04 |
|               Time |            33.9 |
|          Iteration |              76 |
| 

avg action -0.6724714302957719. abs 1.0777097424311113
----------------------------------------
|               Time |            35.7 |
|          Iteration |              80 |
|      AverageReturn |       -1.43e+03 |
|          StdReturn |            56.4 |
|          MaxReturn |       -1.31e+03 |
|          MinReturn |       -1.49e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        9.72e+04 |
|               Time |            35.7 |
|          Iteration |              80 |
|      AverageReturn |       -1.43e+03 |
|          StdReturn |            56.4 |
|          MaxReturn |       -1.31e+03 |
|          MinReturn |       -1.49e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        9.72e+04 |
|               Time |            35.7 |
|          Iteration |              80 |
| 

avg action 0.7599669141520163. abs 1.1814045011615795
----------------------------------------
|               Time |            37.4 |
|          Iteration |              84 |
|      AverageReturn |       -1.45e+03 |
|          StdReturn |            16.9 |
|          MaxReturn |       -1.43e+03 |
|          MinReturn |       -1.48e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.02e+05 |
|               Time |            37.4 |
|          Iteration |              84 |
|      AverageReturn |       -1.45e+03 |
|          StdReturn |            16.9 |
|          MaxReturn |       -1.43e+03 |
|          MinReturn |       -1.48e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.02e+05 |
|               Time |            37.4 |
|          Iteration |              84 |
|  

avg action -0.350098395259621. abs 1.2600996306558836
----------------------------------------
|               Time |            39.1 |
|          Iteration |              88 |
|      AverageReturn |       -1.46e+03 |
|          StdReturn |            37.2 |
|          MaxReturn |       -1.42e+03 |
|          MinReturn |       -1.51e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.07e+05 |
|               Time |            39.1 |
|          Iteration |              88 |
|      AverageReturn |       -1.46e+03 |
|          StdReturn |            37.2 |
|          MaxReturn |       -1.42e+03 |
|          MinReturn |       -1.51e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.07e+05 |
|               Time |            39.1 |
|          Iteration |              88 |
|  

avg action -0.30046645839172553. abs 1.3434620923116356
----------------------------------------
|               Time |            40.9 |
|          Iteration |              92 |
|      AverageReturn |       -1.46e+03 |
|          StdReturn |            56.1 |
|          MaxReturn |       -1.35e+03 |
|          MinReturn |       -1.51e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.12e+05 |
|               Time |            40.9 |
|          Iteration |              92 |
|      AverageReturn |       -1.46e+03 |
|          StdReturn |            56.1 |
|          MaxReturn |       -1.35e+03 |
|          MinReturn |       -1.51e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.12e+05 |
|               Time |            40.9 |
|          Iteration |              92 |
|

avg action -0.2981431015002496. abs 1.3021074810258184
----------------------------------------
|               Time |            42.6 |
|          Iteration |              96 |
|      AverageReturn |       -1.48e+03 |
|          StdReturn |            7.49 |
|          MaxReturn |       -1.47e+03 |
|          MinReturn |       -1.49e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.16e+05 |
|               Time |            42.6 |
|          Iteration |              96 |
|      AverageReturn |       -1.48e+03 |
|          StdReturn |            7.49 |
|          MaxReturn |       -1.47e+03 |
|          MinReturn |       -1.49e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.16e+05 |
|               Time |            42.6 |
|          Iteration |              96 |
| 

[2017-12-12 00:27:09,059] Making new env: Pendulum-v0


avg action -0.3416399760033123. abs 1.351464784222965
----------------------------------------
|               Time |            43.9 |
|          Iteration |              99 |
|      AverageReturn |        -1.5e+03 |
|          StdReturn |            17.3 |
|          MaxReturn |       -1.47e+03 |
|          MinReturn |       -1.52e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         1.2e+05 |
|               Time |            43.9 |
|          Iteration |              99 |
|      AverageReturn |        -1.5e+03 |
|          StdReturn |            17.3 |
|          MaxReturn |       -1.47e+03 |
|          MinReturn |       -1.52e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         1.2e+05 |
|               Time |            43.9 |
|          Iteration |              99 |
|  

avg action 0.021403391144967428. abs 0.1003109247783572
----------------------------------------
|               Time |            1.89 |
|          Iteration |               3 |
|      AverageReturn |       -1.49e+03 |
|          StdReturn |             337 |
|          MaxReturn |            -965 |
|          MinReturn |       -1.92e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         4.8e+03 |
|               Time |            1.89 |
|          Iteration |               3 |
|      AverageReturn |       -1.49e+03 |
|          StdReturn |             337 |
|          MaxReturn |            -965 |
|          MinReturn |       -1.92e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         4.8e+03 |
|               Time |            1.89 |
|          Iteration |               3 |
|

avg action 0.024395589721175726. abs 0.10394289058699266
----------------------------------------
|               Time |            3.64 |
|          Iteration |               7 |
|      AverageReturn |       -1.38e+03 |
|          StdReturn |             366 |
|          MaxReturn |            -974 |
|          MinReturn |       -1.88e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         9.6e+03 |
|               Time |            3.64 |
|          Iteration |               7 |
|      AverageReturn |       -1.38e+03 |
|          StdReturn |             366 |
|          MaxReturn |            -974 |
|          MinReturn |       -1.88e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         9.6e+03 |
|               Time |            3.64 |
|          Iteration |               7 |


avg action 0.03595554971416636. abs 0.11225437141463793
----------------------------------------
|               Time |            5.34 |
|          Iteration |              11 |
|      AverageReturn |       -1.37e+03 |
|          StdReturn |             288 |
|          MaxReturn |            -970 |
|          MinReturn |       -1.81e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.44e+04 |
|               Time |            5.34 |
|          Iteration |              11 |
|      AverageReturn |       -1.37e+03 |
|          StdReturn |             288 |
|          MaxReturn |            -970 |
|          MinReturn |       -1.81e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.44e+04 |
|               Time |            5.34 |
|          Iteration |              11 |
|

avg action 0.024762004622223534. abs 0.06973354329072379
----------------------------------------
|               Time |            7.07 |
|          Iteration |              15 |
|      AverageReturn |       -1.36e+03 |
|          StdReturn |             510 |
|          MaxReturn |            -631 |
|          MinReturn |       -1.94e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.92e+04 |
|               Time |            7.07 |
|          Iteration |              15 |
|      AverageReturn |       -1.36e+03 |
|          StdReturn |             510 |
|          MaxReturn |            -631 |
|          MinReturn |       -1.94e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.92e+04 |
|               Time |            7.07 |
|          Iteration |              15 |


avg action 0.02240625613069697. abs 0.09756862359052314
----------------------------------------
|               Time |             8.8 |
|          Iteration |              19 |
|      AverageReturn |       -1.22e+03 |
|          StdReturn |             253 |
|          MaxReturn |            -894 |
|          MinReturn |       -1.58e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         2.4e+04 |
|               Time |             8.8 |
|          Iteration |              19 |
|      AverageReturn |       -1.22e+03 |
|          StdReturn |             253 |
|          MaxReturn |            -894 |
|          MinReturn |       -1.58e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         2.4e+04 |
|               Time |             8.8 |
|          Iteration |              19 |
|

avg action -0.023055279493883016. abs 0.13414716299135762
----------------------------------------
|               Time |            10.5 |
|          Iteration |              23 |
|      AverageReturn |       -1.17e+03 |
|          StdReturn |             329 |
|          MaxReturn |            -866 |
|          MinReturn |       -1.81e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        2.88e+04 |
|               Time |            10.5 |
|          Iteration |              23 |
|      AverageReturn |       -1.17e+03 |
|          StdReturn |             329 |
|          MaxReturn |            -866 |
|          MinReturn |       -1.81e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        2.88e+04 |
|               Time |            10.5 |
|          Iteration |              23 |

avg action -0.16488060890902023. abs 0.2260434000339139
----------------------------------------
|               Time |            12.3 |
|          Iteration |              27 |
|      AverageReturn |       -1.13e+03 |
|          StdReturn |            90.5 |
|          MaxReturn |       -1.06e+03 |
|          MinReturn |       -1.31e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        3.36e+04 |
|               Time |            12.3 |
|          Iteration |              27 |
|      AverageReturn |       -1.13e+03 |
|          StdReturn |            90.5 |
|          MaxReturn |       -1.06e+03 |
|          MinReturn |       -1.31e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        3.36e+04 |
|               Time |            12.3 |
|          Iteration |              27 |
|

avg action 0.0871874085624928. abs 0.1785129302896931
----------------------------------------
|               Time |              14 |
|          Iteration |              31 |
|      AverageReturn |        -1.1e+03 |
|          StdReturn |             189 |
|          MaxReturn |            -984 |
|          MinReturn |       -1.52e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        3.84e+04 |
|               Time |              14 |
|          Iteration |              31 |
|      AverageReturn |        -1.1e+03 |
|          StdReturn |             189 |
|          MaxReturn |            -984 |
|          MinReturn |       -1.52e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        3.84e+04 |
|               Time |              14 |
|          Iteration |              31 |
|  

avg action -0.018309721729106686. abs 0.2109469084178007
----------------------------------------
|               Time |            15.7 |
|          Iteration |              35 |
|      AverageReturn |       -1.25e+03 |
|          StdReturn |             169 |
|          MaxReturn |       -1.05e+03 |
|          MinReturn |        -1.5e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        4.32e+04 |
|               Time |            15.7 |
|          Iteration |              35 |
|      AverageReturn |       -1.25e+03 |
|          StdReturn |             169 |
|          MaxReturn |       -1.05e+03 |
|          MinReturn |        -1.5e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        4.32e+04 |
|               Time |            15.7 |
|          Iteration |              35 |


avg action -0.05018577747819251. abs 0.2499306007924159
----------------------------------------
|               Time |            17.5 |
|          Iteration |              39 |
|      AverageReturn |       -1.36e+03 |
|          StdReturn |             221 |
|          MaxReturn |       -1.12e+03 |
|          MinReturn |       -1.68e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         4.8e+04 |
|               Time |            17.5 |
|          Iteration |              39 |
|      AverageReturn |       -1.36e+03 |
|          StdReturn |             221 |
|          MaxReturn |       -1.12e+03 |
|          MinReturn |       -1.68e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         4.8e+04 |
|               Time |            17.5 |
|          Iteration |              39 |
|

avg action -0.10660298841405146. abs 0.3126676571362575
----------------------------------------
|               Time |            19.2 |
|          Iteration |              43 |
|      AverageReturn |       -1.12e+03 |
|          StdReturn |              32 |
|          MaxReturn |       -1.09e+03 |
|          MinReturn |       -1.18e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        5.28e+04 |
|               Time |            19.2 |
|          Iteration |              43 |
|      AverageReturn |       -1.12e+03 |
|          StdReturn |              32 |
|          MaxReturn |       -1.09e+03 |
|          MinReturn |       -1.18e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        5.28e+04 |
|               Time |            19.2 |
|          Iteration |              43 |
|

avg action -0.008444158497047417. abs 0.21414008992314099
----------------------------------------
|               Time |            20.9 |
|          Iteration |              47 |
|      AverageReturn |       -1.32e+03 |
|          StdReturn |             243 |
|          MaxReturn |       -1.05e+03 |
|          MinReturn |        -1.7e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        5.76e+04 |
|               Time |            20.9 |
|          Iteration |              47 |
|      AverageReturn |       -1.32e+03 |
|          StdReturn |             243 |
|          MaxReturn |       -1.05e+03 |
|          MinReturn |        -1.7e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        5.76e+04 |
|               Time |            20.9 |
|          Iteration |              47 |

avg action -0.14640204821583008. abs 0.28657101071195956
----------------------------------------
|               Time |            22.7 |
|          Iteration |              51 |
|      AverageReturn |       -1.18e+03 |
|          StdReturn |             120 |
|          MaxReturn |       -1.01e+03 |
|          MinReturn |       -1.33e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        6.24e+04 |
|               Time |            22.7 |
|          Iteration |              51 |
|      AverageReturn |       -1.18e+03 |
|          StdReturn |             120 |
|          MaxReturn |       -1.01e+03 |
|          MinReturn |       -1.33e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        6.24e+04 |
|               Time |            22.7 |
|          Iteration |              51 |


avg action -0.1317329750373998. abs 0.26539651463975816
----------------------------------------
|               Time |            24.4 |
|          Iteration |              55 |
|      AverageReturn |       -1.24e+03 |
|          StdReturn |             182 |
|          MaxReturn |        -1.1e+03 |
|          MinReturn |        -1.6e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        6.72e+04 |
|               Time |            24.4 |
|          Iteration |              55 |
|      AverageReturn |       -1.24e+03 |
|          StdReturn |             182 |
|          MaxReturn |        -1.1e+03 |
|          MinReturn |        -1.6e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        6.72e+04 |
|               Time |            24.4 |
|          Iteration |              55 |
|

avg action -0.17604885906072948. abs 0.3266945577906493
----------------------------------------
|               Time |            26.2 |
|          Iteration |              59 |
|      AverageReturn |       -1.22e+03 |
|          StdReturn |             173 |
|          MaxReturn |            -958 |
|          MinReturn |       -1.52e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         7.2e+04 |
|               Time |            26.2 |
|          Iteration |              59 |
|      AverageReturn |       -1.22e+03 |
|          StdReturn |             173 |
|          MaxReturn |            -958 |
|          MinReturn |       -1.52e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         7.2e+04 |
|               Time |            26.2 |
|          Iteration |              59 |
|

avg action 0.08568192134981284. abs 0.2976654446999946
----------------------------------------
|               Time |            27.9 |
|          Iteration |              63 |
|      AverageReturn |       -1.27e+03 |
|          StdReturn |             218 |
|          MaxReturn |       -1.04e+03 |
|          MinReturn |        -1.6e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        7.68e+04 |
|               Time |            27.9 |
|          Iteration |              63 |
|      AverageReturn |       -1.27e+03 |
|          StdReturn |             218 |
|          MaxReturn |       -1.04e+03 |
|          MinReturn |        -1.6e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        7.68e+04 |
|               Time |            27.9 |
|          Iteration |              63 |
| 

avg action -0.13569313806294672. abs 0.45916865492185216
----------------------------------------
|               Time |            29.6 |
|          Iteration |              67 |
|      AverageReturn |       -1.16e+03 |
|          StdReturn |            19.8 |
|          MaxReturn |       -1.13e+03 |
|          MinReturn |       -1.18e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        8.16e+04 |
|               Time |            29.6 |
|          Iteration |              67 |
|      AverageReturn |       -1.16e+03 |
|          StdReturn |            19.8 |
|          MaxReturn |       -1.13e+03 |
|          MinReturn |       -1.18e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        8.16e+04 |
|               Time |            29.6 |
|          Iteration |              67 |


avg action 0.0036995459718200153. abs 0.44193228170917287
----------------------------------------
|               Time |            31.4 |
|          Iteration |              71 |
|      AverageReturn |       -1.16e+03 |
|          StdReturn |            38.3 |
|          MaxReturn |       -1.09e+03 |
|          MinReturn |       -1.21e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        8.64e+04 |
|               Time |            31.4 |
|          Iteration |              71 |
|      AverageReturn |       -1.16e+03 |
|          StdReturn |            38.3 |
|          MaxReturn |       -1.09e+03 |
|          MinReturn |       -1.21e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        8.64e+04 |
|               Time |            31.4 |
|          Iteration |              71 |

avg action -0.05403709615393303. abs 0.3387796470366063
----------------------------------------
|               Time |            33.3 |
|          Iteration |              75 |
|      AverageReturn |       -1.25e+03 |
|          StdReturn |             219 |
|          MaxReturn |       -1.04e+03 |
|          MinReturn |       -1.72e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        9.12e+04 |
|               Time |            33.3 |
|          Iteration |              75 |
|      AverageReturn |       -1.25e+03 |
|          StdReturn |             219 |
|          MaxReturn |       -1.04e+03 |
|          MinReturn |       -1.72e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        9.12e+04 |
|               Time |            33.3 |
|          Iteration |              75 |
|

avg action -0.028884833405283233. abs 0.32737363277906467
----------------------------------------
|               Time |            36.5 |
|          Iteration |              79 |
|      AverageReturn |       -1.12e+03 |
|          StdReturn |            30.4 |
|          MaxReturn |       -1.09e+03 |
|          MinReturn |       -1.16e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         9.6e+04 |
|               Time |            36.5 |
|          Iteration |              79 |
|      AverageReturn |       -1.12e+03 |
|          StdReturn |            30.4 |
|          MaxReturn |       -1.09e+03 |
|          MinReturn |       -1.16e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         9.6e+04 |
|               Time |            36.5 |
|          Iteration |              79 |

avg action -0.09595007151067987. abs 0.3347906397626932
----------------------------------------
|               Time |              39 |
|          Iteration |              83 |
|      AverageReturn |       -1.11e+03 |
|          StdReturn |            49.7 |
|          MaxReturn |       -1.04e+03 |
|          MinReturn |        -1.2e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.01e+05 |
|               Time |              39 |
|          Iteration |              83 |
|      AverageReturn |       -1.11e+03 |
|          StdReturn |            49.7 |
|          MaxReturn |       -1.04e+03 |
|          MinReturn |        -1.2e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.01e+05 |
|               Time |              39 |
|          Iteration |              83 |
|

avg action -0.009529642323038144. abs 0.2877489167548749
----------------------------------------
|               Time |            41.4 |
|          Iteration |              87 |
|      AverageReturn |       -1.24e+03 |
|          StdReturn |             210 |
|          MaxReturn |       -1.05e+03 |
|          MinReturn |       -1.63e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.06e+05 |
|               Time |            41.4 |
|          Iteration |              87 |
|      AverageReturn |       -1.24e+03 |
|          StdReturn |             210 |
|          MaxReturn |       -1.05e+03 |
|          MinReturn |       -1.63e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.06e+05 |
|               Time |            41.4 |
|          Iteration |              87 |


avg action 0.02559885882863005. abs 0.2738733131523066
----------------------------------------
|               Time |            43.7 |
|          Iteration |              91 |
|      AverageReturn |       -1.24e+03 |
|          StdReturn |             280 |
|          MaxReturn |       -1.04e+03 |
|          MinReturn |       -1.85e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         1.1e+05 |
|               Time |            43.7 |
|          Iteration |              91 |
|      AverageReturn |       -1.24e+03 |
|          StdReturn |             280 |
|          MaxReturn |       -1.04e+03 |
|          MinReturn |       -1.85e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         1.1e+05 |
|               Time |            43.7 |
|          Iteration |              91 |
| 

avg action -0.13454697782733882. abs 0.32849419243784655
----------------------------------------
|               Time |            45.9 |
|          Iteration |              95 |
|      AverageReturn |       -1.19e+03 |
|          StdReturn |            94.2 |
|          MaxReturn |       -1.05e+03 |
|          MinReturn |       -1.31e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.15e+05 |
|               Time |            45.9 |
|          Iteration |              95 |
|      AverageReturn |       -1.19e+03 |
|          StdReturn |            94.2 |
|          MaxReturn |       -1.05e+03 |
|          MinReturn |       -1.31e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.15e+05 |
|               Time |            45.9 |
|          Iteration |              95 |


[2017-12-12 00:27:57,085] Making new env: Pendulum-v0


avg action -0.05225498676746124. abs 0.25256138583972415
----------------------------------------
|               Time |            48.1 |
|          Iteration |              99 |
|      AverageReturn |       -1.35e+03 |
|          StdReturn |             188 |
|          MaxReturn |       -1.12e+03 |
|          MinReturn |        -1.6e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         1.2e+05 |
|               Time |            48.1 |
|          Iteration |              99 |
|      AverageReturn |       -1.35e+03 |
|          StdReturn |             188 |
|          MaxReturn |       -1.12e+03 |
|          MinReturn |        -1.6e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         1.2e+05 |
|               Time |            48.1 |
|          Iteration |              99 |


avg action -0.16290101608120708. abs 0.32355868696439194
----------------------------------------
|               Time |            1.41 |
|          Iteration |               2 |
|      AverageReturn |        -1.6e+03 |
|          StdReturn |             128 |
|          MaxReturn |       -1.48e+03 |
|          MinReturn |       -1.87e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         3.6e+03 |
|               Time |            1.41 |
|          Iteration |               2 |
|      AverageReturn |        -1.6e+03 |
|          StdReturn |             128 |
|          MaxReturn |       -1.48e+03 |
|          MinReturn |       -1.87e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         3.6e+03 |
|               Time |            1.41 |
|          Iteration |               2 |


avg action -0.25162598626171084. abs 0.30730457626439545
----------------------------------------
|               Time |            3.29 |
|          Iteration |               5 |
|      AverageReturn |       -1.74e+03 |
|          StdReturn |             109 |
|          MaxReturn |       -1.55e+03 |
|          MinReturn |       -1.86e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         7.2e+03 |
|               Time |            3.29 |
|          Iteration |               5 |
|      AverageReturn |       -1.74e+03 |
|          StdReturn |             109 |
|          MaxReturn |       -1.55e+03 |
|          MinReturn |       -1.86e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         7.2e+03 |
|               Time |            3.29 |
|          Iteration |               5 |


avg action -0.270000500530364. abs 0.3902656971762889
----------------------------------------
|               Time |            5.21 |
|          Iteration |               8 |
|      AverageReturn |       -1.58e+03 |
|          StdReturn |             141 |
|          MaxReturn |        -1.4e+03 |
|          MinReturn |       -1.82e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.08e+04 |
|               Time |            5.21 |
|          Iteration |               8 |
|      AverageReturn |       -1.58e+03 |
|          StdReturn |             141 |
|          MaxReturn |        -1.4e+03 |
|          MinReturn |       -1.82e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.08e+04 |
|               Time |            5.21 |
|          Iteration |               8 |
|  

avg action -0.3014375673289183. abs 0.37211208142084895
----------------------------------------
|               Time |            6.68 |
|          Iteration |              11 |
|      AverageReturn |       -1.67e+03 |
|          StdReturn |             143 |
|          MaxReturn |       -1.49e+03 |
|          MinReturn |       -1.89e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.44e+04 |
|               Time |            6.68 |
|          Iteration |              11 |
|      AverageReturn |       -1.67e+03 |
|          StdReturn |             143 |
|          MaxReturn |       -1.49e+03 |
|          MinReturn |       -1.89e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        1.44e+04 |
|               Time |            6.68 |
|          Iteration |              11 |
|

avg action -0.3366784542899384. abs 0.3659003369575829
----------------------------------------
|               Time |            7.99 |
|          Iteration |              14 |
|      AverageReturn |       -1.74e+03 |
|          StdReturn |             102 |
|          MaxReturn |       -1.55e+03 |
|          MinReturn |       -1.89e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         1.8e+04 |
|               Time |            7.99 |
|          Iteration |              14 |
|      AverageReturn |       -1.74e+03 |
|          StdReturn |             102 |
|          MaxReturn |       -1.55e+03 |
|          MinReturn |       -1.89e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         1.8e+04 |
|               Time |            7.99 |
|          Iteration |              14 |
| 

avg action -0.3189625454133142. abs 0.3807793351650626
----------------------------------------
|               Time |            9.46 |
|          Iteration |              17 |
|      AverageReturn |       -1.71e+03 |
|          StdReturn |             143 |
|          MaxReturn |       -1.43e+03 |
|          MinReturn |       -1.87e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        2.16e+04 |
|               Time |            9.46 |
|          Iteration |              17 |
|      AverageReturn |       -1.71e+03 |
|          StdReturn |             143 |
|          MaxReturn |       -1.43e+03 |
|          MinReturn |       -1.87e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        2.16e+04 |
|               Time |            9.46 |
|          Iteration |              17 |
| 

avg action -0.3385557394547409. abs 0.4115529380219974
----------------------------------------
|               Time |            11.2 |
|          Iteration |              20 |
|      AverageReturn |       -1.66e+03 |
|          StdReturn |             117 |
|          MaxReturn |       -1.56e+03 |
|          MinReturn |       -1.87e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        2.52e+04 |
|               Time |            11.2 |
|          Iteration |              20 |
|      AverageReturn |       -1.66e+03 |
|          StdReturn |             117 |
|          MaxReturn |       -1.56e+03 |
|          MinReturn |       -1.87e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        2.52e+04 |
|               Time |            11.2 |
|          Iteration |              20 |
| 

avg action -0.276122321764498. abs 0.4209419196726622
----------------------------------------
|               Time |            12.7 |
|          Iteration |              23 |
|      AverageReturn |       -1.55e+03 |
|          StdReturn |             108 |
|          MaxReturn |       -1.46e+03 |
|          MinReturn |       -1.77e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        2.88e+04 |
|               Time |            12.7 |
|          Iteration |              23 |
|      AverageReturn |       -1.55e+03 |
|          StdReturn |             108 |
|          MaxReturn |       -1.46e+03 |
|          MinReturn |       -1.77e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        2.88e+04 |
|               Time |            12.7 |
|          Iteration |              23 |
|  

avg action -0.28359926815142783. abs 0.3551587462858807
----------------------------------------
|               Time |            14.7 |
|          Iteration |              26 |
|      AverageReturn |       -1.73e+03 |
|          StdReturn |            97.3 |
|          MaxReturn |       -1.53e+03 |
|          MinReturn |       -1.85e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        3.24e+04 |
|               Time |            14.7 |
|          Iteration |              26 |
|      AverageReturn |       -1.73e+03 |
|          StdReturn |            97.3 |
|          MaxReturn |       -1.53e+03 |
|          MinReturn |       -1.85e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        3.24e+04 |
|               Time |            14.7 |
|          Iteration |              26 |
|

avg action -0.2844000752557504. abs 0.35894051989503434
----------------------------------------
|               Time |            16.3 |
|          Iteration |              29 |
|      AverageReturn |       -1.73e+03 |
|          StdReturn |             131 |
|          MaxReturn |       -1.56e+03 |
|          MinReturn |       -1.89e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         3.6e+04 |
|               Time |            16.3 |
|          Iteration |              29 |
|      AverageReturn |       -1.73e+03 |
|          StdReturn |             131 |
|          MaxReturn |       -1.56e+03 |
|          MinReturn |       -1.89e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |         3.6e+04 |
|               Time |            16.3 |
|          Iteration |              29 |
|

avg action -0.2875314992650927. abs 0.35399720921331695
----------------------------------------
|               Time |            17.6 |
|          Iteration |              32 |
|      AverageReturn |       -1.71e+03 |
|          StdReturn |             157 |
|          MaxReturn |       -1.43e+03 |
|          MinReturn |       -1.86e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        3.96e+04 |
|               Time |            17.6 |
|          Iteration |              32 |
|      AverageReturn |       -1.71e+03 |
|          StdReturn |             157 |
|          MaxReturn |       -1.43e+03 |
|          MinReturn |       -1.86e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        3.96e+04 |
|               Time |            17.6 |
|          Iteration |              32 |
|

avg action -0.25192006856862387. abs 0.3636417189131537
----------------------------------------
|               Time |              19 |
|          Iteration |              35 |
|      AverageReturn |       -1.59e+03 |
|          StdReturn |             119 |
|          MaxReturn |       -1.45e+03 |
|          MinReturn |       -1.81e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        4.32e+04 |
|               Time |              19 |
|          Iteration |              35 |
|      AverageReturn |       -1.59e+03 |
|          StdReturn |             119 |
|          MaxReturn |       -1.45e+03 |
|          MinReturn |       -1.81e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        4.32e+04 |
|               Time |              19 |
|          Iteration |              35 |
|

avg action -0.24588974558391327. abs 0.33423379493977895
----------------------------------------
|               Time |            20.4 |
|          Iteration |              38 |
|      AverageReturn |       -1.63e+03 |
|          StdReturn |             105 |
|          MaxReturn |       -1.53e+03 |
|          MinReturn |       -1.83e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        4.68e+04 |
|               Time |            20.4 |
|          Iteration |              38 |
|      AverageReturn |       -1.63e+03 |
|          StdReturn |             105 |
|          MaxReturn |       -1.53e+03 |
|          MinReturn |       -1.83e+03 |
|          EpLenMean |             200 |
|           EpLenStd |               0 |
| TimestepsThisBatch |         1.2e+03 |
|     TimestepsSoFar |        4.68e+04 |
|               Time |            20.4 |
|          Iteration |              38 |


KeyboardInterrupt: 

In [None]:
%debug

In [None]:
env.action_space.sample()