In [1]:
import gym_electric_motor as gem
import gym
import tensorflow as tf
from tf_agents.environments import suite_gym
from tf_agents.environments import tf_py_environment
from tf_agents.networks import q_network
from gym.wrappers import FlattenObservation #, TimeLimit
from tf_agents.environments.wrappers import TimeLimit
from tf_agents.agents.dqn import dqn_agent
from tf_agents.utils import common

from gym_electric_motor.visualization import MotorDashboard, ConsolePrinter
from gym_electric_motor.physical_systems import ConstantSpeedLoad

import numpy as np



In [2]:
gamma = 0.99
tau = 1e-5

class SqdCurrentMonitor:
    """
    monitor for squared currents:
    
    i_sd**2 + i_sq**2 < 1.5 * nominal_limit 
    """
    
    def __call__(self, state, observed_states, k, physical_system):
        self.I_SD_IDX = physical_system.state_names.index('i_sd')
        self.I_SQ_IDX = physical_system.state_names.index('i_sq')
        # normalize to limit_values, since state is normalized
        nominal_values = physical_system.nominal_state / abs(physical_system.limits)
        limits = 1.5 * nominal_values
        # calculating squared currents as observed measure 
        sqd_currents = state[self.I_SD_IDX]**2 + state[self.I_SQ_IDX]**2
        
        return (sqd_currents > limits[self.I_SD_IDX] or sqd_currents > limits[self.I_SQ_IDX])
    
    
motor_parameter = dict(p=3,  # [p] = 1, nb of pole pairs
                       r_s=17.932e-3,  # [r_s] = Ohm, stator resistance
                       l_d=0.37e-3,  # [l_d] = H, d-axis inductance
                       l_q=1.2e-3,  # [l_q] = H, q-axis inductance
                       psi_p=65.65e-3,  # [psi_p] = Vs, magnetic flux of the permanent magnet
                       )  # BRUSA
u_sup = 350

nominal_values=dict(omega=4000*2*np.pi/60,
                    i=230,
                    u=u_sup
                    )
limit_values=nominal_values.copy()

q_generator = gem.reference_generators.WienerProcessReferenceGenerator(reference_state='i_sq')
d_generator = gem.reference_generators.WienerProcessReferenceGenerator(reference_state='i_sd')
rg = gem.reference_generators.MultipleReferenceGenerator([q_generator, d_generator])

gym_env_kwargs1 = {'visualization': MotorDashboard(plots = ['i_sq', 'i_sd', 'reward']),
            # parameterize the PMSM
               'motor_parameter' : motor_parameter,
               'limit_values' : limit_values,
               'nominal_values' : nominal_values,
               'u_sup' : u_sup,
               'load' : ConstantSpeedLoad(omega_fixed=1000 * np.pi / 30),
           
               'tau' : tau ,
               'motor_initializer': {'random_init': 'uniform',},
                  
                  ## pass a reward function with a gamma!!  todo
            # turn off terminations via limit violation and parameterize the reward function
            'reward_function' : gem.reward_functions.WeightedSumOfErrors(observed_states=['i_sq', 'i_sd'], 
                                                                        reward_weights={'i_sq': 1, 'i_sd': 1},
                                                                        constraint_monitor = SqdCurrentMonitor(),
                                                                        gamma = gamma,
                                                                        reward_power=1
                                                                      ),
            
            'reference_generator' : rg,
            # define a numerical solver of adequate accuracy
            'ode_solver' : 'euler' #'scipy.solve_ivp'
    
}

gym_env_kwargs2 = {'visualization': MotorDashboard(plots = ['i_sq', 'i_sd', 'reward']),
            # parameterize the PMSM
               'motor_parameter' : motor_parameter,
               'limit_values' : limit_values,
               'nominal_values' : nominal_values,
               'u_sup' : u_sup,
               'load' : ConstantSpeedLoad(omega_fixed=1000 * np.pi / 30),
           
               'tau' : tau ,
               'motor_initializer': {'random_init': 'uniform',},
                  
                  ## pass a reward function with a gamma!!  todo
            # turn off terminations via limit violation and parameterize the reward function
            'reward_function' : gem.reward_functions.WeightedSumOfErrors(observed_states=['i_sq', 'i_sd'], 
                                                                        reward_weights={'i_sq': 1, 'i_sd': 1},
                                                                        constraint_monitor = SqdCurrentMonitor(),
                                                                        gamma = gamma,
                                                                        reward_power=1
                                                                      ),
            
            'reference_generator' : rg,
            # define a numerical solver of adequate accuracy
            'ode_solver' : 'euler' #'scipy.solve_ivp'
    
}




In [3]:
# # # Alternate way to create a tf compatible gym env

# t_env = gem.make("emotor-pmsm-disc-v1", **gym_env_kwargs2)   # define a PMSM with continuous action space
                
# t_env_f = FlattenObservation(t_env) 

# t_py_env = suite_gym.wrap_env(t_env_f, max_episode_steps=1000)
# t_tf_env = tf_py_environment.TFPyEnvironment(t_py_env)

In [4]:
env_name = "PMSMDisc-v1"
env = suite_gym.load(env_name, max_episode_steps=10000, gym_env_wrappers=[FlattenObservation],
                      gym_kwargs = gym_env_kwargs1 )  #
train_env = tf_py_environment.TFPyEnvironment(env)


# #env = FlattenObservation(env)
#eval_py_env =suite_gym.load(env_name, max_episode_steps=10000, gym_env_wrappers=[FlattenObservation],
#                      gym_kwargs = gym_env_kwargs2) #gem.make("emotor-pmsm-disc-v1", **gym_env_kwargs2)



#eval_tf_env = tf_py_environment.TFPyEnvironment(eval_py_env)


print(train_env)


<tf_agents.environments.tf_py_environment.TFPyEnvironment object at 0x7f1e4399f9b0>




In [5]:
# create a neural network:

fc_layer_params = (64,64)

q_net = q_network.QNetwork(
    train_env.observation_spec(),
    train_env.action_spec(),
    fc_layer_params=fc_layer_params)


In [6]:
# Hyper-parameters

num_iterations = 500000 

initial_collect_steps = 2000  
collect_steps_per_iteration = 1 
replay_buffer_max_length = 100000 

batch_size = 64  
learning_rate = 1e-4  
log_interval = 200 

num_eval_episodes = 10 
eval_interval = 1000

In [10]:
# instantiate DQN agent

optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)

train_step_counter = tf.Variable(0)

global_step = tf.compat.v1.train.get_or_create_global_step()
start_epsilon = 0.1
n_of_steps = int(0.9*num_iterations)
end_epsilon = 0.0001
epsilon = tf.compat.v1.train.polynomial_decay(
      start_epsilon,
      global_step,
      n_of_steps,
      end_learning_rate=end_epsilon)

agent = dqn_agent.DqnAgent(
    train_env.time_step_spec(),
    train_env.action_spec(),
    q_network=q_net,
    epsilon_greedy=epsilon,
    optimizer=optimizer,
    gamma=gamma,
    td_errors_loss_fn=common.element_wise_squared_loss,
    train_step_counter=train_step_counter)

agent.initialize()


In [11]:
class ShowProgress:
    def __init__(self, total):
        self.counter = 0
        self.total = total
    def __call__(self, trajectory):
        if not trajectory.is_boundary():
            self.counter += 1
        if self.counter % 100 == 0:
            print("\r{}/{}".format(self.counter, self.total), end="")

In [12]:
# create a replay buffer

from tf_agents.replay_buffers import tf_uniform_replay_buffer

replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
    data_spec=agent.collect_data_spec,
    batch_size=train_env.batch_size,
    max_length=replay_buffer_max_length)

replay_buffer_observer = replay_buffer.add_batch

In [13]:
from tf_agents.metrics import tf_metrics

train_metrics = [
    tf_metrics.NumberOfEpisodes(),
    tf_metrics.EnvironmentSteps(),
    tf_metrics.AverageReturnMetric(),
    tf_metrics.AverageEpisodeLengthMetric(),
]

In [14]:
from tf_agents.eval.metric_utils import log_metrics
import logging
log_metrics(train_metrics)
#logging.get_logger().set_level(logging.INFO)  
logging.getLogger().setLevel(logging.INFO)  

In [15]:
# Driver to take action steps in the environment
from tf_agents.drivers.dynamic_step_driver import DynamicStepDriver

collect_driver = DynamicStepDriver(
    train_env,
    agent.collect_policy,
    observers=[replay_buffer_observer] + train_metrics) 

In [16]:
# fill the replay buffer initially with trajectories from a random policy

from tf_agents.policies.random_tf_policy import RandomTFPolicy

initial_collect_policy = RandomTFPolicy(train_env.time_step_spec(),
                                        train_env.action_spec())
init_driver = DynamicStepDriver(
    train_env,
    initial_collect_policy,
    observers=[replay_buffer.add_batch, ShowProgress(initial_collect_steps)],
    num_steps=initial_collect_steps) 
final_time_step, final_policy_state = init_driver.run()

Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))


Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))


2000/2000

In [17]:
# from tf_agents.trajectories import trajectory

# def collect_step(environment, policy, buffer):
#   time_step = environment.current_time_step()
#   action_step = policy.action(time_step)
#   next_time_step = environment.step(action_step.action)
#   traj = trajectory.from_transition(time_step, action_step, next_time_step)

#   # Add trajectory to the replay buffer
#   buffer.add_batch(traj)

# def collect_data(env, policy, buffer, steps):
#   for _ in range(steps):
#     collect_step(env, policy, buffer)

# random_policy = RandomTFPolicy(train_env.time_step_spec(),
#                                                 train_env.action_spec())
# collect_data(train_env, random_policy, replay_buffer, steps=100)

In [18]:
# dataset is sampled from the replay buffer

dataset = replay_buffer.as_dataset(
    num_parallel_calls=3, 
    sample_batch_size=batch_size, 
    num_steps=2).prefetch(3)


dataset
iterator = iter(dataset)

print(iterator)

<tensorflow.python.data.ops.iterator_ops.OwnedIterator object at 0x7f1e1004dcc0>


In [19]:
from tf_agents.utils.common import function

agent.train = common.function(agent.train)
collect_driver.run = function(collect_driver.run)

# Reset the train step
agent.train_step_counter.assign(0)



time_step = None
policy_state = agent.collect_policy.get_initial_state(train_env.batch_size)
#iterator = iter(dataset)
loss = []
for iteration in range(num_iterations):
    time_step, policy_state = collect_driver.run(time_step, policy_state)
    trajectories, buffer_info = next(iterator)
    train_loss = agent.train(trajectories)
    loss.append(train_loss.loss.numpy())
    #t_env_f.render()
    print("\r{} loss:{:.5f}".format(
        iteration, train_loss.loss.numpy()), end="")
    if iteration % 1000 == 0:
        log_metrics(train_metrics)

INFO:absl: 
		 NumberOfEpisodes = 0
		 EnvironmentSteps = 1
		 AverageReturn = 0.0
		 AverageEpisodeLength = 0.0


939 loss:0.33009791

INFO:absl: 
		 NumberOfEpisodes = 6
		 EnvironmentSteps = 1001
		 AverageReturn = -273.6981506347656
		 AverageEpisodeLength = 157.1666717529297


1998 loss:1.51066442

INFO:absl: 
		 NumberOfEpisodes = 12
		 EnvironmentSteps = 2001
		 AverageReturn = -248.0764923095703
		 AverageEpisodeLength = 106.69999694824219


2988 loss:1.99862468

INFO:absl: 
		 NumberOfEpisodes = 14
		 EnvironmentSteps = 3001
		 AverageReturn = -286.3427429199219
		 AverageEpisodeLength = 210.89999389648438


3929 loss:2.11406755

INFO:absl: 
		 NumberOfEpisodes = 15
		 EnvironmentSteps = 4001
		 AverageReturn = -315.2497863769531
		 AverageEpisodeLength = 296.20001220703125


4989 loss:2.4199876

INFO:absl: 
		 NumberOfEpisodes = 18
		 EnvironmentSteps = 5001
		 AverageReturn = -339.6245422363281
		 AverageEpisodeLength = 325.5


5987 loss:408.504009

INFO:absl: 
		 NumberOfEpisodes = 18
		 EnvironmentSteps = 6001
		 AverageReturn = -339.6245422363281
		 AverageEpisodeLength = 325.5


6942 loss:484.84637

INFO:absl: 
		 NumberOfEpisodes = 18
		 EnvironmentSteps = 7001
		 AverageReturn = -339.6245422363281
		 AverageEpisodeLength = 325.5


7936 loss:1.9514264

INFO:absl: 
		 NumberOfEpisodes = 19
		 EnvironmentSteps = 8001
		 AverageReturn = -500.54034423828125
		 AverageEpisodeLength = 637.7000122070312


8994 loss:421.06818

INFO:absl: 
		 NumberOfEpisodes = 20
		 EnvironmentSteps = 9001
		 AverageReturn = -538.8235473632812
		 AverageEpisodeLength = 749.9000244140625


9988 loss:2.5519136

INFO:absl: 
		 NumberOfEpisodes = 22
		 EnvironmentSteps = 10001
		 AverageReturn = -573.0860595703125
		 AverageEpisodeLength = 830.2000122070312


10980 loss:1.9857923

INFO:absl: 
		 NumberOfEpisodes = 22
		 EnvironmentSteps = 11001
		 AverageReturn = -573.0860595703125
		 AverageEpisodeLength = 830.2000122070312


11993 loss:1.66499443

INFO:absl: 
		 NumberOfEpisodes = 22
		 EnvironmentSteps = 12001
		 AverageReturn = -573.0860595703125
		 AverageEpisodeLength = 830.2000122070312


12991 loss:1.2399062

INFO:absl: 
		 NumberOfEpisodes = 22
		 EnvironmentSteps = 13001
		 AverageReturn = -573.0860595703125
		 AverageEpisodeLength = 830.2000122070312


13957 loss:2.2498370

INFO:absl: 
		 NumberOfEpisodes = 22
		 EnvironmentSteps = 14001
		 AverageReturn = -573.0860595703125
		 AverageEpisodeLength = 830.2000122070312


14966 loss:1.25124112

INFO:absl: 
		 NumberOfEpisodes = 22
		 EnvironmentSteps = 15001
		 AverageReturn = -573.0860595703125
		 AverageEpisodeLength = 830.2000122070312


15964 loss:1.67482951

INFO:absl: 
		 NumberOfEpisodes = 22
		 EnvironmentSteps = 16001
		 AverageReturn = -573.0860595703125
		 AverageEpisodeLength = 830.2000122070312


16964 loss:1.6155430

INFO:absl: 
		 NumberOfEpisodes = 22
		 EnvironmentSteps = 17001
		 AverageReturn = -573.0860595703125
		 AverageEpisodeLength = 830.2000122070312


17995 loss:1.8144786

INFO:absl: 
		 NumberOfEpisodes = 22
		 EnvironmentSteps = 18001
		 AverageReturn = -573.0860595703125
		 AverageEpisodeLength = 830.2000122070312


18987 loss:1.7204925

INFO:absl: 
		 NumberOfEpisodes = 22
		 EnvironmentSteps = 19001
		 AverageReturn = -573.0860595703125
		 AverageEpisodeLength = 830.2000122070312


19985 loss:404.67340

INFO:absl: 
		 NumberOfEpisodes = 23
		 EnvironmentSteps = 20001
		 AverageReturn = -1028.8485107421875
		 AverageEpisodeLength = 1749.800048828125


20956 loss:2.0366809

INFO:absl: 
		 NumberOfEpisodes = 23
		 EnvironmentSteps = 21001
		 AverageReturn = -1028.8485107421875
		 AverageEpisodeLength = 1749.800048828125


21951 loss:0.7032065

INFO:absl: 
		 NumberOfEpisodes = 23
		 EnvironmentSteps = 22001
		 AverageReturn = -1028.8485107421875
		 AverageEpisodeLength = 1749.800048828125


22958 loss:2.3695931

INFO:absl: 
		 NumberOfEpisodes = 23
		 EnvironmentSteps = 23001
		 AverageReturn = -1028.8485107421875
		 AverageEpisodeLength = 1749.800048828125


23954 loss:1.0799346

INFO:absl: 
		 NumberOfEpisodes = 23
		 EnvironmentSteps = 24001
		 AverageReturn = -1028.8485107421875
		 AverageEpisodeLength = 1749.800048828125


24994 loss:1.6342176

INFO:absl: 
		 NumberOfEpisodes = 23
		 EnvironmentSteps = 25001
		 AverageReturn = -1028.8485107421875
		 AverageEpisodeLength = 1749.800048828125


25940 loss:0.4155584

INFO:absl: 
		 NumberOfEpisodes = 23
		 EnvironmentSteps = 26001
		 AverageReturn = -1028.8485107421875
		 AverageEpisodeLength = 1749.800048828125


26946 loss:0.6620359

INFO:absl: 
		 NumberOfEpisodes = 23
		 EnvironmentSteps = 27001
		 AverageReturn = -1028.8485107421875
		 AverageEpisodeLength = 1749.800048828125


27953 loss:0.86440648

INFO:absl: 
		 NumberOfEpisodes = 23
		 EnvironmentSteps = 28001
		 AverageReturn = -1028.8485107421875
		 AverageEpisodeLength = 1749.800048828125


28957 loss:433.553133

INFO:absl: 
		 NumberOfEpisodes = 23
		 EnvironmentSteps = 29001
		 AverageReturn = -1028.8485107421875
		 AverageEpisodeLength = 1749.800048828125


29993 loss:515.081058

INFO:absl: 
		 NumberOfEpisodes = 24
		 EnvironmentSteps = 30001
		 AverageReturn = -1438.0828857421875
		 AverageEpisodeLength = 2701.89990234375


30945 loss:0.8908746

INFO:absl: 
		 NumberOfEpisodes = 24
		 EnvironmentSteps = 31001
		 AverageReturn = -1438.0828857421875
		 AverageEpisodeLength = 2701.89990234375


31995 loss:1.3686087

INFO:absl: 
		 NumberOfEpisodes = 24
		 EnvironmentSteps = 32001
		 AverageReturn = -1438.0828857421875
		 AverageEpisodeLength = 2701.89990234375


32942 loss:0.66978938

INFO:absl: 
		 NumberOfEpisodes = 24
		 EnvironmentSteps = 33001
		 AverageReturn = -1438.0828857421875
		 AverageEpisodeLength = 2701.89990234375


33932 loss:0.70678932

INFO:absl: 
		 NumberOfEpisodes = 24
		 EnvironmentSteps = 34001
		 AverageReturn = -1438.0828857421875
		 AverageEpisodeLength = 2701.89990234375


34996 loss:1.1698151

INFO:absl: 
		 NumberOfEpisodes = 24
		 EnvironmentSteps = 35001
		 AverageReturn = -1438.0828857421875
		 AverageEpisodeLength = 2701.89990234375


35935 loss:2.7035723

INFO:absl: 
		 NumberOfEpisodes = 25
		 EnvironmentSteps = 36001
		 AverageReturn = -1620.996826171875
		 AverageEpisodeLength = 3196.199951171875


36974 loss:1.1121561

INFO:absl: 
		 NumberOfEpisodes = 25
		 EnvironmentSteps = 37001
		 AverageReturn = -1620.996826171875
		 AverageEpisodeLength = 3196.199951171875


37965 loss:0.4598198

INFO:absl: 
		 NumberOfEpisodes = 25
		 EnvironmentSteps = 38001
		 AverageReturn = -1620.996826171875
		 AverageEpisodeLength = 3196.199951171875


38954 loss:1.4210863

INFO:absl: 
		 NumberOfEpisodes = 25
		 EnvironmentSteps = 39001
		 AverageReturn = -1620.996826171875
		 AverageEpisodeLength = 3196.199951171875


39984 loss:0.6292238

INFO:absl: 
		 NumberOfEpisodes = 25
		 EnvironmentSteps = 40001
		 AverageReturn = -1620.996826171875
		 AverageEpisodeLength = 3196.199951171875


40940 loss:480.82532

INFO:absl: 
		 NumberOfEpisodes = 25
		 EnvironmentSteps = 41001
		 AverageReturn = -1620.996826171875
		 AverageEpisodeLength = 3196.199951171875


41958 loss:0.95092717

INFO:absl: 
		 NumberOfEpisodes = 25
		 EnvironmentSteps = 42001
		 AverageReturn = -1620.996826171875
		 AverageEpisodeLength = 3196.199951171875


42931 loss:0.9117303

INFO:absl: 
		 NumberOfEpisodes = 25
		 EnvironmentSteps = 43001
		 AverageReturn = -1620.996826171875
		 AverageEpisodeLength = 3196.199951171875


43930 loss:1.6917011

INFO:absl: 
		 NumberOfEpisodes = 25
		 EnvironmentSteps = 44001
		 AverageReturn = -1620.996826171875
		 AverageEpisodeLength = 3196.199951171875


44999 loss:0.9697949

INFO:absl: 
		 NumberOfEpisodes = 25
		 EnvironmentSteps = 45001
		 AverageReturn = -1620.996826171875
		 AverageEpisodeLength = 3196.199951171875


45995 loss:0.3787078

INFO:absl: 
		 NumberOfEpisodes = 26
		 EnvironmentSteps = 46001
		 AverageReturn = -2159.05615234375
		 AverageEpisodeLength = 4116.89990234375


46988 loss:0.7346474

INFO:absl: 
		 NumberOfEpisodes = 26
		 EnvironmentSteps = 47001
		 AverageReturn = -2159.05615234375
		 AverageEpisodeLength = 4116.89990234375


47976 loss:0.6548228

INFO:absl: 
		 NumberOfEpisodes = 26
		 EnvironmentSteps = 48001
		 AverageReturn = -2159.05615234375
		 AverageEpisodeLength = 4116.89990234375


48972 loss:0.7186048

INFO:absl: 
		 NumberOfEpisodes = 26
		 EnvironmentSteps = 49001
		 AverageReturn = -2159.05615234375
		 AverageEpisodeLength = 4116.89990234375


49968 loss:0.6495961

INFO:absl: 
		 NumberOfEpisodes = 26
		 EnvironmentSteps = 50001
		 AverageReturn = -2159.05615234375
		 AverageEpisodeLength = 4116.89990234375


50965 loss:0.4866646

INFO:absl: 
		 NumberOfEpisodes = 26
		 EnvironmentSteps = 51001
		 AverageReturn = -2159.05615234375
		 AverageEpisodeLength = 4116.89990234375


51962 loss:0.9192775

INFO:absl: 
		 NumberOfEpisodes = 26
		 EnvironmentSteps = 52001
		 AverageReturn = -2159.05615234375
		 AverageEpisodeLength = 4116.89990234375


52953 loss:0.7029379

INFO:absl: 
		 NumberOfEpisodes = 26
		 EnvironmentSteps = 53001
		 AverageReturn = -2159.05615234375
		 AverageEpisodeLength = 4116.89990234375


53988 loss:0.9342963

INFO:absl: 
		 NumberOfEpisodes = 26
		 EnvironmentSteps = 54001
		 AverageReturn = -2159.05615234375
		 AverageEpisodeLength = 4116.89990234375


54992 loss:1.1058969

INFO:absl: 
		 NumberOfEpisodes = 26
		 EnvironmentSteps = 55001
		 AverageReturn = -2159.05615234375
		 AverageEpisodeLength = 4116.89990234375


55990 loss:0.7334186

INFO:absl: 
		 NumberOfEpisodes = 27
		 EnvironmentSteps = 56001
		 AverageReturn = -2706.884521484375
		 AverageEpisodeLength = 5110.0


56989 loss:0.9055865

INFO:absl: 
		 NumberOfEpisodes = 27
		 EnvironmentSteps = 57001
		 AverageReturn = -2706.884521484375
		 AverageEpisodeLength = 5110.0


57985 loss:564.98029

INFO:absl: 
		 NumberOfEpisodes = 27
		 EnvironmentSteps = 58001
		 AverageReturn = -2706.884521484375
		 AverageEpisodeLength = 5110.0


58930 loss:0.6741008

INFO:absl: 
		 NumberOfEpisodes = 27
		 EnvironmentSteps = 59001
		 AverageReturn = -2706.884521484375
		 AverageEpisodeLength = 5110.0


59929 loss:1.7617428

INFO:absl: 
		 NumberOfEpisodes = 27
		 EnvironmentSteps = 60001
		 AverageReturn = -2706.884521484375
		 AverageEpisodeLength = 5110.0


60990 loss:0.9505498

INFO:absl: 
		 NumberOfEpisodes = 27
		 EnvironmentSteps = 61001
		 AverageReturn = -2706.884521484375
		 AverageEpisodeLength = 5110.0


61984 loss:0.7803331

INFO:absl: 
		 NumberOfEpisodes = 27
		 EnvironmentSteps = 62001
		 AverageReturn = -2706.884521484375
		 AverageEpisodeLength = 5110.0


62976 loss:1.3165368

INFO:absl: 
		 NumberOfEpisodes = 27
		 EnvironmentSteps = 63001
		 AverageReturn = -2706.884521484375
		 AverageEpisodeLength = 5110.0


63983 loss:1.1847758

INFO:absl: 
		 NumberOfEpisodes = 27
		 EnvironmentSteps = 64001
		 AverageReturn = -2706.884521484375
		 AverageEpisodeLength = 5110.0


64974 loss:0.5918767

INFO:absl: 
		 NumberOfEpisodes = 27
		 EnvironmentSteps = 65001
		 AverageReturn = -2706.884521484375
		 AverageEpisodeLength = 5110.0


65968 loss:0.9393341

INFO:absl: 
		 NumberOfEpisodes = 28
		 EnvironmentSteps = 66001
		 AverageReturn = -3119.59033203125
		 AverageEpisodeLength = 6106.2001953125


66934 loss:1.0328536

INFO:absl: 
		 NumberOfEpisodes = 28
		 EnvironmentSteps = 67001
		 AverageReturn = -3119.59033203125
		 AverageEpisodeLength = 6106.2001953125


67928 loss:1.6526662

INFO:absl: 
		 NumberOfEpisodes = 28
		 EnvironmentSteps = 68001
		 AverageReturn = -3119.59033203125
		 AverageEpisodeLength = 6106.2001953125


68997 loss:1.6283283

INFO:absl: 
		 NumberOfEpisodes = 28
		 EnvironmentSteps = 69001
		 AverageReturn = -3119.59033203125
		 AverageEpisodeLength = 6106.2001953125


69995 loss:1.4516708

INFO:absl: 
		 NumberOfEpisodes = 28
		 EnvironmentSteps = 70001
		 AverageReturn = -3119.59033203125
		 AverageEpisodeLength = 6106.2001953125


70989 loss:1.04959681

INFO:absl: 
		 NumberOfEpisodes = 28
		 EnvironmentSteps = 71001
		 AverageReturn = -3119.59033203125
		 AverageEpisodeLength = 6106.2001953125


71979 loss:1.6967168

INFO:absl: 
		 NumberOfEpisodes = 28
		 EnvironmentSteps = 72001
		 AverageReturn = -3119.59033203125
		 AverageEpisodeLength = 6106.2001953125


72997 loss:0.5661415

INFO:absl: 
		 NumberOfEpisodes = 28
		 EnvironmentSteps = 73001
		 AverageReturn = -3119.59033203125
		 AverageEpisodeLength = 6106.2001953125


73993 loss:0.7135796

INFO:absl: 
		 NumberOfEpisodes = 28
		 EnvironmentSteps = 74001
		 AverageReturn = -3119.59033203125
		 AverageEpisodeLength = 6106.2001953125


74957 loss:0.8378161

INFO:absl: 
		 NumberOfEpisodes = 28
		 EnvironmentSteps = 75001
		 AverageReturn = -3119.59033203125
		 AverageEpisodeLength = 6106.2001953125


75950 loss:1.2572421

INFO:absl: 
		 NumberOfEpisodes = 29
		 EnvironmentSteps = 76001
		 AverageReturn = -3387.024169921875
		 AverageEpisodeLength = 6788.7001953125


76940 loss:1.4525341

INFO:absl: 
		 NumberOfEpisodes = 29
		 EnvironmentSteps = 77001
		 AverageReturn = -3387.024169921875
		 AverageEpisodeLength = 6788.7001953125


77999 loss:1.2384204

INFO:absl: 
		 NumberOfEpisodes = 29
		 EnvironmentSteps = 78001
		 AverageReturn = -3387.024169921875
		 AverageEpisodeLength = 6788.7001953125


78996 loss:1.2843164

INFO:absl: 
		 NumberOfEpisodes = 29
		 EnvironmentSteps = 79001
		 AverageReturn = -3387.024169921875
		 AverageEpisodeLength = 6788.7001953125


79970 loss:1.0752117

INFO:absl: 
		 NumberOfEpisodes = 29
		 EnvironmentSteps = 80001
		 AverageReturn = -3387.024169921875
		 AverageEpisodeLength = 6788.7001953125


80966 loss:0.7724145

INFO:absl: 
		 NumberOfEpisodes = 29
		 EnvironmentSteps = 81001
		 AverageReturn = -3387.024169921875
		 AverageEpisodeLength = 6788.7001953125


81971 loss:392.49963

INFO:absl: 
		 NumberOfEpisodes = 29
		 EnvironmentSteps = 82001
		 AverageReturn = -3387.024169921875
		 AverageEpisodeLength = 6788.7001953125


82997 loss:1.4436431

INFO:absl: 
		 NumberOfEpisodes = 29
		 EnvironmentSteps = 83001
		 AverageReturn = -3387.024169921875
		 AverageEpisodeLength = 6788.7001953125


83999 loss:3.0258456

INFO:absl: 
		 NumberOfEpisodes = 29
		 EnvironmentSteps = 84001
		 AverageReturn = -3387.024169921875
		 AverageEpisodeLength = 6788.7001953125


84946 loss:0.9317018

INFO:absl: 
		 NumberOfEpisodes = 29
		 EnvironmentSteps = 85001
		 AverageReturn = -3387.024169921875
		 AverageEpisodeLength = 6788.7001953125


85940 loss:0.7364967

INFO:absl: 
		 NumberOfEpisodes = 30
		 EnvironmentSteps = 86001
		 AverageReturn = -3846.24755859375
		 AverageEpisodeLength = 7674.7998046875


86932 loss:1.2646019

INFO:absl: 
		 NumberOfEpisodes = 30
		 EnvironmentSteps = 87001
		 AverageReturn = -3846.24755859375
		 AverageEpisodeLength = 7674.7998046875


87971 loss:0.8374468

INFO:absl: 
		 NumberOfEpisodes = 30
		 EnvironmentSteps = 88001
		 AverageReturn = -3846.24755859375
		 AverageEpisodeLength = 7674.7998046875


88971 loss:1.7082777

INFO:absl: 
		 NumberOfEpisodes = 30
		 EnvironmentSteps = 89001
		 AverageReturn = -3846.24755859375
		 AverageEpisodeLength = 7674.7998046875


89976 loss:0.8406475

INFO:absl: 
		 NumberOfEpisodes = 30
		 EnvironmentSteps = 90001
		 AverageReturn = -3846.24755859375
		 AverageEpisodeLength = 7674.7998046875


90991 loss:1.0517857

INFO:absl: 
		 NumberOfEpisodes = 30
		 EnvironmentSteps = 91001
		 AverageReturn = -3846.24755859375
		 AverageEpisodeLength = 7674.7998046875


91987 loss:1.0083043

INFO:absl: 
		 NumberOfEpisodes = 30
		 EnvironmentSteps = 92001
		 AverageReturn = -3846.24755859375
		 AverageEpisodeLength = 7674.7998046875


92979 loss:0.7331254

INFO:absl: 
		 NumberOfEpisodes = 30
		 EnvironmentSteps = 93001
		 AverageReturn = -3846.24755859375
		 AverageEpisodeLength = 7674.7998046875


93974 loss:1.4053258

INFO:absl: 
		 NumberOfEpisodes = 30
		 EnvironmentSteps = 94001
		 AverageReturn = -3846.24755859375
		 AverageEpisodeLength = 7674.7998046875


94981 loss:1.1599910

INFO:absl: 
		 NumberOfEpisodes = 30
		 EnvironmentSteps = 95001
		 AverageReturn = -3846.24755859375
		 AverageEpisodeLength = 7674.7998046875


95983 loss:0.7701387

INFO:absl: 
		 NumberOfEpisodes = 31
		 EnvironmentSteps = 96001
		 AverageReturn = -4119.82421875
		 AverageEpisodeLength = 8581.2001953125


96943 loss:0.5249231

INFO:absl: 
		 NumberOfEpisodes = 31
		 EnvironmentSteps = 97001
		 AverageReturn = -4119.82421875
		 AverageEpisodeLength = 8581.2001953125


97985 loss:1.8451953

INFO:absl: 
		 NumberOfEpisodes = 31
		 EnvironmentSteps = 98001
		 AverageReturn = -4119.82421875
		 AverageEpisodeLength = 8581.2001953125


98978 loss:0.9714877

INFO:absl: 
		 NumberOfEpisodes = 31
		 EnvironmentSteps = 99001
		 AverageReturn = -4119.82421875
		 AverageEpisodeLength = 8581.2001953125


99975 loss:1.2755450

INFO:absl: 
		 NumberOfEpisodes = 31
		 EnvironmentSteps = 100001
		 AverageReturn = -4119.82421875
		 AverageEpisodeLength = 8581.2001953125


100957 loss:0.5396248

INFO:absl: 
		 NumberOfEpisodes = 31
		 EnvironmentSteps = 101001
		 AverageReturn = -4119.82421875
		 AverageEpisodeLength = 8581.2001953125


101949 loss:0.8220236

INFO:absl: 
		 NumberOfEpisodes = 31
		 EnvironmentSteps = 102001
		 AverageReturn = -4119.82421875
		 AverageEpisodeLength = 8581.2001953125


102976 loss:1.3096946

INFO:absl: 
		 NumberOfEpisodes = 31
		 EnvironmentSteps = 103001
		 AverageReturn = -4119.82421875
		 AverageEpisodeLength = 8581.2001953125


103967 loss:0.75077

INFO:absl: 
		 NumberOfEpisodes = 31
		 EnvironmentSteps = 104001
		 AverageReturn = -4119.82421875
		 AverageEpisodeLength = 8581.2001953125


104960 loss:0.7364828

INFO:absl: 
		 NumberOfEpisodes = 31
		 EnvironmentSteps = 105001
		 AverageReturn = -4119.82421875
		 AverageEpisodeLength = 8581.2001953125


105974 loss:0.8307693

INFO:absl: 
		 NumberOfEpisodes = 32
		 EnvironmentSteps = 106001
		 AverageReturn = -4536.51513671875
		 AverageEpisodeLength = 9581.099609375


106931 loss:0.8189455

INFO:absl: 
		 NumberOfEpisodes = 32
		 EnvironmentSteps = 107001
		 AverageReturn = -4536.51513671875
		 AverageEpisodeLength = 9581.099609375


107932 loss:0.7728797

INFO:absl: 
		 NumberOfEpisodes = 32
		 EnvironmentSteps = 108001
		 AverageReturn = -4536.51513671875
		 AverageEpisodeLength = 9581.099609375


108938 loss:0.6771025

INFO:absl: 
		 NumberOfEpisodes = 32
		 EnvironmentSteps = 109001
		 AverageReturn = -4536.51513671875
		 AverageEpisodeLength = 9581.099609375


109937 loss:1.4948830

INFO:absl: 
		 NumberOfEpisodes = 32
		 EnvironmentSteps = 110001
		 AverageReturn = -4536.51513671875
		 AverageEpisodeLength = 9581.099609375


110935 loss:0.65317

INFO:absl: 
		 NumberOfEpisodes = 32
		 EnvironmentSteps = 111001
		 AverageReturn = -4536.51513671875
		 AverageEpisodeLength = 9581.099609375


111959 loss:0.8502100

INFO:absl: 
		 NumberOfEpisodes = 32
		 EnvironmentSteps = 112001
		 AverageReturn = -4536.51513671875
		 AverageEpisodeLength = 9581.099609375


112953 loss:1.03005

INFO:absl: 
		 NumberOfEpisodes = 32
		 EnvironmentSteps = 113001
		 AverageReturn = -4536.51513671875
		 AverageEpisodeLength = 9581.099609375


113976 loss:0.6092170

INFO:absl: 
		 NumberOfEpisodes = 32
		 EnvironmentSteps = 114001
		 AverageReturn = -4536.51513671875
		 AverageEpisodeLength = 9581.099609375


114998 loss:1.5196219

INFO:absl: 
		 NumberOfEpisodes = 32
		 EnvironmentSteps = 115001
		 AverageReturn = -4536.51513671875
		 AverageEpisodeLength = 9581.099609375


115936 loss:0.92052

INFO:absl: 
		 NumberOfEpisodes = 33
		 EnvironmentSteps = 116001
		 AverageReturn = -4592.1943359375
		 AverageEpisodeLength = 9581.099609375


117000 loss:1.0542968

INFO:absl: 
		 NumberOfEpisodes = 33
		 EnvironmentSteps = 117001
		 AverageReturn = -4592.1943359375
		 AverageEpisodeLength = 9581.099609375


117961 loss:1.2209520

INFO:absl: 
		 NumberOfEpisodes = 33
		 EnvironmentSteps = 118001
		 AverageReturn = -4592.1943359375
		 AverageEpisodeLength = 9581.099609375


118967 loss:1.59254

INFO:absl: 
		 NumberOfEpisodes = 33
		 EnvironmentSteps = 119001
		 AverageReturn = -4592.1943359375
		 AverageEpisodeLength = 9581.099609375


119970 loss:0.7010433

INFO:absl: 
		 NumberOfEpisodes = 33
		 EnvironmentSteps = 120001
		 AverageReturn = -4592.1943359375
		 AverageEpisodeLength = 9581.099609375


120961 loss:0.89581

INFO:absl: 
		 NumberOfEpisodes = 33
		 EnvironmentSteps = 121001
		 AverageReturn = -4592.1943359375
		 AverageEpisodeLength = 9581.099609375


121993 loss:0.75536

INFO:absl: 
		 NumberOfEpisodes = 33
		 EnvironmentSteps = 122001
		 AverageReturn = -4592.1943359375
		 AverageEpisodeLength = 9581.099609375


122980 loss:0.7536642

INFO:absl: 
		 NumberOfEpisodes = 33
		 EnvironmentSteps = 123001
		 AverageReturn = -4592.1943359375
		 AverageEpisodeLength = 9581.099609375


123990 loss:1.4969353

INFO:absl: 
		 NumberOfEpisodes = 33
		 EnvironmentSteps = 124001
		 AverageReturn = -4592.1943359375
		 AverageEpisodeLength = 9581.099609375


124995 loss:1.21208

INFO:absl: 
		 NumberOfEpisodes = 33
		 EnvironmentSteps = 125001
		 AverageReturn = -4592.1943359375
		 AverageEpisodeLength = 9581.099609375


125935 loss:1.7516838

INFO:absl: 
		 NumberOfEpisodes = 34
		 EnvironmentSteps = 126001
		 AverageReturn = -4426.51171875
		 AverageEpisodeLength = 9581.099609375


126941 loss:1.07986

INFO:absl: 
		 NumberOfEpisodes = 34
		 EnvironmentSteps = 127001
		 AverageReturn = -4426.51171875
		 AverageEpisodeLength = 9581.099609375


127946 loss:0.6590937

INFO:absl: 
		 NumberOfEpisodes = 34
		 EnvironmentSteps = 128001
		 AverageReturn = -4426.51171875
		 AverageEpisodeLength = 9581.099609375


128931 loss:1.1019994

INFO:absl: 
		 NumberOfEpisodes = 34
		 EnvironmentSteps = 129001
		 AverageReturn = -4426.51171875
		 AverageEpisodeLength = 9581.099609375


129940 loss:1.23105

INFO:absl: 
		 NumberOfEpisodes = 34
		 EnvironmentSteps = 130001
		 AverageReturn = -4426.51171875
		 AverageEpisodeLength = 9581.099609375


130949 loss:1.25913

INFO:absl: 
		 NumberOfEpisodes = 34
		 EnvironmentSteps = 131001
		 AverageReturn = -4426.51171875
		 AverageEpisodeLength = 9581.099609375


131942 loss:1.10658

INFO:absl: 
		 NumberOfEpisodes = 34
		 EnvironmentSteps = 132001
		 AverageReturn = -4426.51171875
		 AverageEpisodeLength = 9581.099609375


132944 loss:1.42572

INFO:absl: 
		 NumberOfEpisodes = 34
		 EnvironmentSteps = 133001
		 AverageReturn = -4426.51171875
		 AverageEpisodeLength = 9581.099609375


133935 loss:1.3956626

INFO:absl: 
		 NumberOfEpisodes = 34
		 EnvironmentSteps = 134001
		 AverageReturn = -4426.51171875
		 AverageEpisodeLength = 9581.099609375


134931 loss:0.9199749

INFO:absl: 
		 NumberOfEpisodes = 34
		 EnvironmentSteps = 135001
		 AverageReturn = -4426.51171875
		 AverageEpisodeLength = 9581.099609375


135949 loss:0.86726

INFO:absl: 
		 NumberOfEpisodes = 35
		 EnvironmentSteps = 136001
		 AverageReturn = -4562.82958984375
		 AverageEpisodeLength = 10000.0


136960 loss:2.07502

INFO:absl: 
		 NumberOfEpisodes = 35
		 EnvironmentSteps = 137001
		 AverageReturn = -4562.82958984375
		 AverageEpisodeLength = 10000.0


137959 loss:0.80370

INFO:absl: 
		 NumberOfEpisodes = 35
		 EnvironmentSteps = 138001
		 AverageReturn = -4562.82958984375
		 AverageEpisodeLength = 10000.0


138988 loss:0.95095

INFO:absl: 
		 NumberOfEpisodes = 35
		 EnvironmentSteps = 139001
		 AverageReturn = -4562.82958984375
		 AverageEpisodeLength = 10000.0


139964 loss:1.30122

INFO:absl: 
		 NumberOfEpisodes = 35
		 EnvironmentSteps = 140001
		 AverageReturn = -4562.82958984375
		 AverageEpisodeLength = 10000.0


140975 loss:1.08316

INFO:absl: 
		 NumberOfEpisodes = 35
		 EnvironmentSteps = 141001
		 AverageReturn = -4562.82958984375
		 AverageEpisodeLength = 10000.0


141972 loss:0.72172

INFO:absl: 
		 NumberOfEpisodes = 35
		 EnvironmentSteps = 142001
		 AverageReturn = -4562.82958984375
		 AverageEpisodeLength = 10000.0


142983 loss:0.91731

INFO:absl: 
		 NumberOfEpisodes = 35
		 EnvironmentSteps = 143001
		 AverageReturn = -4562.82958984375
		 AverageEpisodeLength = 10000.0


143978 loss:0.88753

INFO:absl: 
		 NumberOfEpisodes = 35
		 EnvironmentSteps = 144001
		 AverageReturn = -4562.82958984375
		 AverageEpisodeLength = 10000.0


144987 loss:0.66531

INFO:absl: 
		 NumberOfEpisodes = 35
		 EnvironmentSteps = 145001
		 AverageReturn = -4562.82958984375
		 AverageEpisodeLength = 10000.0


145980 loss:1.75356

INFO:absl: 
		 NumberOfEpisodes = 36
		 EnvironmentSteps = 146001
		 AverageReturn = -4590.99365234375
		 AverageEpisodeLength = 10000.0


146937 loss:0.76082

INFO:absl: 
		 NumberOfEpisodes = 36
		 EnvironmentSteps = 147001
		 AverageReturn = -4590.99365234375
		 AverageEpisodeLength = 10000.0


147930 loss:0.58710

INFO:absl: 
		 NumberOfEpisodes = 36
		 EnvironmentSteps = 148001
		 AverageReturn = -4590.99365234375
		 AverageEpisodeLength = 10000.0


148932 loss:0.92833

INFO:absl: 
		 NumberOfEpisodes = 36
		 EnvironmentSteps = 149001
		 AverageReturn = -4590.99365234375
		 AverageEpisodeLength = 10000.0


149970 loss:0.80071

INFO:absl: 
		 NumberOfEpisodes = 36
		 EnvironmentSteps = 150001
		 AverageReturn = -4590.99365234375
		 AverageEpisodeLength = 10000.0


150994 loss:1.84405

INFO:absl: 
		 NumberOfEpisodes = 36
		 EnvironmentSteps = 151001
		 AverageReturn = -4590.99365234375
		 AverageEpisodeLength = 10000.0


151942 loss:1.44867

INFO:absl: 
		 NumberOfEpisodes = 36
		 EnvironmentSteps = 152001
		 AverageReturn = -4590.99365234375
		 AverageEpisodeLength = 10000.0


152937 loss:0.59064

INFO:absl: 
		 NumberOfEpisodes = 36
		 EnvironmentSteps = 153001
		 AverageReturn = -4590.99365234375
		 AverageEpisodeLength = 10000.0


153954 loss:1.35614

INFO:absl: 
		 NumberOfEpisodes = 36
		 EnvironmentSteps = 154001
		 AverageReturn = -4590.99365234375
		 AverageEpisodeLength = 10000.0


154963 loss:1.75756

INFO:absl: 
		 NumberOfEpisodes = 36
		 EnvironmentSteps = 155001
		 AverageReturn = -4590.99365234375
		 AverageEpisodeLength = 10000.0


155952 loss:1.25444

INFO:absl: 
		 NumberOfEpisodes = 37
		 EnvironmentSteps = 156001
		 AverageReturn = -4430.27734375
		 AverageEpisodeLength = 10000.0


156945 loss:1.07887

INFO:absl: 
		 NumberOfEpisodes = 37
		 EnvironmentSteps = 157001
		 AverageReturn = -4430.27734375
		 AverageEpisodeLength = 10000.0


157942 loss:0.88443

INFO:absl: 
		 NumberOfEpisodes = 37
		 EnvironmentSteps = 158001
		 AverageReturn = -4430.27734375
		 AverageEpisodeLength = 10000.0


158933 loss:1.91915

INFO:absl: 
		 NumberOfEpisodes = 37
		 EnvironmentSteps = 159001
		 AverageReturn = -4430.27734375
		 AverageEpisodeLength = 10000.0


159998 loss:2.15785

INFO:absl: 
		 NumberOfEpisodes = 37
		 EnvironmentSteps = 160001
		 AverageReturn = -4430.27734375
		 AverageEpisodeLength = 10000.0


160968 loss:0.85281

INFO:absl: 
		 NumberOfEpisodes = 37
		 EnvironmentSteps = 161001
		 AverageReturn = -4430.27734375
		 AverageEpisodeLength = 10000.0


161984 loss:0.62214

INFO:absl: 
		 NumberOfEpisodes = 37
		 EnvironmentSteps = 162001
		 AverageReturn = -4430.27734375
		 AverageEpisodeLength = 10000.0


162982 loss:1.28084

INFO:absl: 
		 NumberOfEpisodes = 37
		 EnvironmentSteps = 163001
		 AverageReturn = -4430.27734375
		 AverageEpisodeLength = 10000.0


163974 loss:1.12717

INFO:absl: 
		 NumberOfEpisodes = 37
		 EnvironmentSteps = 164001
		 AverageReturn = -4430.27734375
		 AverageEpisodeLength = 10000.0


164985 loss:1.10826

INFO:absl: 
		 NumberOfEpisodes = 37
		 EnvironmentSteps = 165001
		 AverageReturn = -4430.27734375
		 AverageEpisodeLength = 10000.0


165929 loss:1.19142

INFO:absl: 
		 NumberOfEpisodes = 38
		 EnvironmentSteps = 166001
		 AverageReturn = -4521.3076171875
		 AverageEpisodeLength = 10000.0


166946 loss:1.53927

INFO:absl: 
		 NumberOfEpisodes = 38
		 EnvironmentSteps = 167001
		 AverageReturn = -4521.3076171875
		 AverageEpisodeLength = 10000.0


167966 loss:1.45632

INFO:absl: 
		 NumberOfEpisodes = 38
		 EnvironmentSteps = 168001
		 AverageReturn = -4521.3076171875
		 AverageEpisodeLength = 10000.0


168981 loss:1.00942

INFO:absl: 
		 NumberOfEpisodes = 38
		 EnvironmentSteps = 169001
		 AverageReturn = -4521.3076171875
		 AverageEpisodeLength = 10000.0


169974 loss:0.78513

INFO:absl: 
		 NumberOfEpisodes = 38
		 EnvironmentSteps = 170001
		 AverageReturn = -4521.3076171875
		 AverageEpisodeLength = 10000.0


170966 loss:1.42341

INFO:absl: 
		 NumberOfEpisodes = 38
		 EnvironmentSteps = 171001
		 AverageReturn = -4521.3076171875
		 AverageEpisodeLength = 10000.0


171931 loss:2.22758

INFO:absl: 
		 NumberOfEpisodes = 38
		 EnvironmentSteps = 172001
		 AverageReturn = -4521.3076171875
		 AverageEpisodeLength = 10000.0


172992 loss:0.77635

INFO:absl: 
		 NumberOfEpisodes = 38
		 EnvironmentSteps = 173001
		 AverageReturn = -4521.3076171875
		 AverageEpisodeLength = 10000.0


173988 loss:1.19931

INFO:absl: 
		 NumberOfEpisodes = 38
		 EnvironmentSteps = 174001
		 AverageReturn = -4521.3076171875
		 AverageEpisodeLength = 10000.0


174979 loss:1.52928

INFO:absl: 
		 NumberOfEpisodes = 38
		 EnvironmentSteps = 175001
		 AverageReturn = -4521.3076171875
		 AverageEpisodeLength = 10000.0


175973 loss:1.63881

INFO:absl: 
		 NumberOfEpisodes = 40
		 EnvironmentSteps = 176001
		 AverageReturn = -3902.135498046875
		 AverageEpisodeLength = 9000.099609375


176970 loss:0.4065323

INFO:absl: 
		 NumberOfEpisodes = 40
		 EnvironmentSteps = 177001
		 AverageReturn = -3902.135498046875
		 AverageEpisodeLength = 9000.099609375


177963 loss:1.37577

INFO:absl: 
		 NumberOfEpisodes = 40
		 EnvironmentSteps = 178001
		 AverageReturn = -3902.135498046875
		 AverageEpisodeLength = 9000.099609375


178962 loss:1.12823

INFO:absl: 
		 NumberOfEpisodes = 40
		 EnvironmentSteps = 179001
		 AverageReturn = -3902.135498046875
		 AverageEpisodeLength = 9000.099609375


179948 loss:0.9095567

INFO:absl: 
		 NumberOfEpisodes = 40
		 EnvironmentSteps = 180001
		 AverageReturn = -3902.135498046875
		 AverageEpisodeLength = 9000.099609375


180961 loss:1.6503143

INFO:absl: 
		 NumberOfEpisodes = 40
		 EnvironmentSteps = 181001
		 AverageReturn = -3902.135498046875
		 AverageEpisodeLength = 9000.099609375


181952 loss:1.4289578

INFO:absl: 
		 NumberOfEpisodes = 40
		 EnvironmentSteps = 182001
		 AverageReturn = -3902.135498046875
		 AverageEpisodeLength = 9000.099609375


182947 loss:0.50346

INFO:absl: 
		 NumberOfEpisodes = 40
		 EnvironmentSteps = 183001
		 AverageReturn = -3902.135498046875
		 AverageEpisodeLength = 9000.099609375


183934 loss:0.77750

INFO:absl: 
		 NumberOfEpisodes = 41
		 EnvironmentSteps = 184001
		 AverageReturn = -3935.274658203125
		 AverageEpisodeLength = 8768.400390625


184931 loss:1.1868428

INFO:absl: 
		 NumberOfEpisodes = 41
		 EnvironmentSteps = 185001
		 AverageReturn = -3935.274658203125
		 AverageEpisodeLength = 8768.400390625


185938 loss:0.8169365

INFO:absl: 
		 NumberOfEpisodes = 41
		 EnvironmentSteps = 186001
		 AverageReturn = -3935.274658203125
		 AverageEpisodeLength = 8768.400390625


186941 loss:1.20904

INFO:absl: 
		 NumberOfEpisodes = 41
		 EnvironmentSteps = 187001
		 AverageReturn = -3935.274658203125
		 AverageEpisodeLength = 8768.400390625


187937 loss:0.71881

INFO:absl: 
		 NumberOfEpisodes = 41
		 EnvironmentSteps = 188001
		 AverageReturn = -3935.274658203125
		 AverageEpisodeLength = 8768.400390625


188999 loss:1.0882702

INFO:absl: 
		 NumberOfEpisodes = 41
		 EnvironmentSteps = 189001
		 AverageReturn = -3935.274658203125
		 AverageEpisodeLength = 8768.400390625


189933 loss:0.71587

INFO:absl: 
		 NumberOfEpisodes = 41
		 EnvironmentSteps = 190001
		 AverageReturn = -3935.274658203125
		 AverageEpisodeLength = 8768.400390625


190949 loss:0.9610404

INFO:absl: 
		 NumberOfEpisodes = 41
		 EnvironmentSteps = 191001
		 AverageReturn = -3935.274658203125
		 AverageEpisodeLength = 8768.400390625


191945 loss:0.68879

INFO:absl: 
		 NumberOfEpisodes = 41
		 EnvironmentSteps = 192001
		 AverageReturn = -3935.274658203125
		 AverageEpisodeLength = 8768.400390625


192964 loss:1.2076473

INFO:absl: 
		 NumberOfEpisodes = 41
		 EnvironmentSteps = 193001
		 AverageReturn = -3935.274658203125
		 AverageEpisodeLength = 8768.400390625


193986 loss:0.71020

INFO:absl: 
		 NumberOfEpisodes = 42
		 EnvironmentSteps = 194001
		 AverageReturn = -3994.44580078125
		 AverageEpisodeLength = 8768.400390625


194967 loss:1.06024

INFO:absl: 
		 NumberOfEpisodes = 42
		 EnvironmentSteps = 195001
		 AverageReturn = -3994.44580078125
		 AverageEpisodeLength = 8768.400390625


195964 loss:0.8521907

INFO:absl: 
		 NumberOfEpisodes = 42
		 EnvironmentSteps = 196001
		 AverageReturn = -3994.44580078125
		 AverageEpisodeLength = 8768.400390625


196963 loss:1.0142088

INFO:absl: 
		 NumberOfEpisodes = 43
		 EnvironmentSteps = 197001
		 AverageReturn = -3569.961669921875
		 AverageEpisodeLength = 8061.0


197960 loss:1.1424459

INFO:absl: 
		 NumberOfEpisodes = 43
		 EnvironmentSteps = 198001
		 AverageReturn = -3569.961669921875
		 AverageEpisodeLength = 8061.0


198949 loss:0.5962499

INFO:absl: 
		 NumberOfEpisodes = 43
		 EnvironmentSteps = 199001
		 AverageReturn = -3569.961669921875
		 AverageEpisodeLength = 8061.0


199949 loss:0.9390481

INFO:absl: 
		 NumberOfEpisodes = 44
		 EnvironmentSteps = 200001
		 AverageReturn = -3388.28271484375
		 AverageEpisodeLength = 7451.89990234375


200966 loss:1.8327888

INFO:absl: 
		 NumberOfEpisodes = 44
		 EnvironmentSteps = 201001
		 AverageReturn = -3388.28271484375
		 AverageEpisodeLength = 7451.89990234375


201984 loss:0.4725509

INFO:absl: 
		 NumberOfEpisodes = 44
		 EnvironmentSteps = 202001
		 AverageReturn = -3388.28271484375
		 AverageEpisodeLength = 7451.89990234375


202989 loss:1.1211082

INFO:absl: 
		 NumberOfEpisodes = 45
		 EnvironmentSteps = 203001
		 AverageReturn = -3122.70703125
		 AverageEpisodeLength = 6682.7998046875


203955 loss:1.6022043

INFO:absl: 
		 NumberOfEpisodes = 45
		 EnvironmentSteps = 204001
		 AverageReturn = -3122.70703125
		 AverageEpisodeLength = 6682.7998046875


204955 loss:0.6290541

INFO:absl: 
		 NumberOfEpisodes = 45
		 EnvironmentSteps = 205001
		 AverageReturn = -3122.70703125
		 AverageEpisodeLength = 6682.7998046875


205954 loss:1.4155346

INFO:absl: 
		 NumberOfEpisodes = 45
		 EnvironmentSteps = 206001
		 AverageReturn = -3122.70703125
		 AverageEpisodeLength = 6682.7998046875


206943 loss:1.7110405

INFO:absl: 
		 NumberOfEpisodes = 47
		 EnvironmentSteps = 207001
		 AverageReturn = -2274.476806640625
		 AverageEpisodeLength = 5099.60009765625


207968 loss:1.5042536

INFO:absl: 
		 NumberOfEpisodes = 47
		 EnvironmentSteps = 208001
		 AverageReturn = -2274.476806640625
		 AverageEpisodeLength = 5099.60009765625


208958 loss:1.4281056

INFO:absl: 
		 NumberOfEpisodes = 47
		 EnvironmentSteps = 209001
		 AverageReturn = -2274.476806640625
		 AverageEpisodeLength = 5099.60009765625


209979 loss:0.8743721

INFO:absl: 
		 NumberOfEpisodes = 47
		 EnvironmentSteps = 210001
		 AverageReturn = -2274.476806640625
		 AverageEpisodeLength = 5099.60009765625


210971 loss:1.3338167

INFO:absl: 
		 NumberOfEpisodes = 47
		 EnvironmentSteps = 211001
		 AverageReturn = -2274.476806640625
		 AverageEpisodeLength = 5099.60009765625


211960 loss:0.7466225

INFO:absl: 
		 NumberOfEpisodes = 47
		 EnvironmentSteps = 212001
		 AverageReturn = -2274.476806640625
		 AverageEpisodeLength = 5099.60009765625


212955 loss:0.5920728

INFO:absl: 
		 NumberOfEpisodes = 47
		 EnvironmentSteps = 213001
		 AverageReturn = -2274.476806640625
		 AverageEpisodeLength = 5099.60009765625


213945 loss:2.1583269

INFO:absl: 
		 NumberOfEpisodes = 47
		 EnvironmentSteps = 214001
		 AverageReturn = -2274.476806640625
		 AverageEpisodeLength = 5099.60009765625


214943 loss:0.9622119

INFO:absl: 
		 NumberOfEpisodes = 48
		 EnvironmentSteps = 215001
		 AverageReturn = -2082.13818359375
		 AverageEpisodeLength = 4942.10009765625


215931 loss:0.6896703

INFO:absl: 
		 NumberOfEpisodes = 48
		 EnvironmentSteps = 216001
		 AverageReturn = -2082.13818359375
		 AverageEpisodeLength = 4942.10009765625


216948 loss:1.2331377

INFO:absl: 
		 NumberOfEpisodes = 48
		 EnvironmentSteps = 217001
		 AverageReturn = -2082.13818359375
		 AverageEpisodeLength = 4942.10009765625


217948 loss:0.7063786

INFO:absl: 
		 NumberOfEpisodes = 48
		 EnvironmentSteps = 218001
		 AverageReturn = -2082.13818359375
		 AverageEpisodeLength = 4942.10009765625


218939 loss:0.6131522

INFO:absl: 
		 NumberOfEpisodes = 48
		 EnvironmentSteps = 219001
		 AverageReturn = -2082.13818359375
		 AverageEpisodeLength = 4942.10009765625


219938 loss:1.3146611

INFO:absl: 
		 NumberOfEpisodes = 48
		 EnvironmentSteps = 220001
		 AverageReturn = -2082.13818359375
		 AverageEpisodeLength = 4942.10009765625


220938 loss:1.0939101

INFO:absl: 
		 NumberOfEpisodes = 48
		 EnvironmentSteps = 221001
		 AverageReturn = -2082.13818359375
		 AverageEpisodeLength = 4942.10009765625


221936 loss:0.4370817

INFO:absl: 
		 NumberOfEpisodes = 48
		 EnvironmentSteps = 222001
		 AverageReturn = -2082.13818359375
		 AverageEpisodeLength = 4942.10009765625


222934 loss:1.0162610

INFO:absl: 
		 NumberOfEpisodes = 48
		 EnvironmentSteps = 223001
		 AverageReturn = -2082.13818359375
		 AverageEpisodeLength = 4942.10009765625


223999 loss:1.9068104

INFO:absl: 
		 NumberOfEpisodes = 48
		 EnvironmentSteps = 224001
		 AverageReturn = -2082.13818359375
		 AverageEpisodeLength = 4942.10009765625


224933 loss:0.8110202

INFO:absl: 
		 NumberOfEpisodes = 49
		 EnvironmentSteps = 225001
		 AverageReturn = -2016.425537109375
		 AverageEpisodeLength = 4942.10009765625


225968 loss:1.0845959

INFO:absl: 
		 NumberOfEpisodes = 49
		 EnvironmentSteps = 226001
		 AverageReturn = -2016.425537109375
		 AverageEpisodeLength = 4942.10009765625


226960 loss:1.2065789

INFO:absl: 
		 NumberOfEpisodes = 49
		 EnvironmentSteps = 227001
		 AverageReturn = -2016.425537109375
		 AverageEpisodeLength = 4942.10009765625


227957 loss:1.1379462

INFO:absl: 
		 NumberOfEpisodes = 49
		 EnvironmentSteps = 228001
		 AverageReturn = -2016.425537109375
		 AverageEpisodeLength = 4942.10009765625


228952 loss:1.4442048

INFO:absl: 
		 NumberOfEpisodes = 49
		 EnvironmentSteps = 229001
		 AverageReturn = -2016.425537109375
		 AverageEpisodeLength = 4942.10009765625


229966 loss:1.3911587

INFO:absl: 
		 NumberOfEpisodes = 49
		 EnvironmentSteps = 230001
		 AverageReturn = -2016.425537109375
		 AverageEpisodeLength = 4942.10009765625


230969 loss:0.8883779

INFO:absl: 
		 NumberOfEpisodes = 49
		 EnvironmentSteps = 231001
		 AverageReturn = -2016.425537109375
		 AverageEpisodeLength = 4942.10009765625


231974 loss:1.2338536

INFO:absl: 
		 NumberOfEpisodes = 49
		 EnvironmentSteps = 232001
		 AverageReturn = -2016.425537109375
		 AverageEpisodeLength = 4942.10009765625


232983 loss:0.7925969

INFO:absl: 
		 NumberOfEpisodes = 49
		 EnvironmentSteps = 233001
		 AverageReturn = -2016.425537109375
		 AverageEpisodeLength = 4942.10009765625


233930 loss:0.8908470

INFO:absl: 
		 NumberOfEpisodes = 49
		 EnvironmentSteps = 234001
		 AverageReturn = -2016.425537109375
		 AverageEpisodeLength = 4942.10009765625


234934 loss:1.4255905

INFO:absl: 
		 NumberOfEpisodes = 50
		 EnvironmentSteps = 235001
		 AverageReturn = -2306.173828125
		 AverageEpisodeLength = 5942.0


235996 loss:0.9449250

INFO:absl: 
		 NumberOfEpisodes = 50
		 EnvironmentSteps = 236001
		 AverageReturn = -2306.173828125
		 AverageEpisodeLength = 5942.0


236994 loss:1.9565479

INFO:absl: 
		 NumberOfEpisodes = 50
		 EnvironmentSteps = 237001
		 AverageReturn = -2306.173828125
		 AverageEpisodeLength = 5942.0


237982 loss:2.1861203

INFO:absl: 
		 NumberOfEpisodes = 50
		 EnvironmentSteps = 238001
		 AverageReturn = -2306.173828125
		 AverageEpisodeLength = 5942.0


238978 loss:1.6998276

INFO:absl: 
		 NumberOfEpisodes = 50
		 EnvironmentSteps = 239001
		 AverageReturn = -2306.173828125
		 AverageEpisodeLength = 5942.0


239974 loss:0.6555158

INFO:absl: 
		 NumberOfEpisodes = 50
		 EnvironmentSteps = 240001
		 AverageReturn = -2306.173828125
		 AverageEpisodeLength = 5942.0


240985 loss:1.2825314

INFO:absl: 
		 NumberOfEpisodes = 50
		 EnvironmentSteps = 241001
		 AverageReturn = -2306.173828125
		 AverageEpisodeLength = 5942.0


241983 loss:1.1318680

INFO:absl: 
		 NumberOfEpisodes = 50
		 EnvironmentSteps = 242001
		 AverageReturn = -2306.173828125
		 AverageEpisodeLength = 5942.0


242985 loss:1.7872397

INFO:absl: 
		 NumberOfEpisodes = 50
		 EnvironmentSteps = 243001
		 AverageReturn = -2306.173828125
		 AverageEpisodeLength = 5942.0


243996 loss:1.4031879

INFO:absl: 
		 NumberOfEpisodes = 50
		 EnvironmentSteps = 244001
		 AverageReturn = -2306.173828125
		 AverageEpisodeLength = 5942.0


244988 loss:2.3170978

INFO:absl: 
		 NumberOfEpisodes = 51
		 EnvironmentSteps = 245001
		 AverageReturn = -2356.9970703125
		 AverageEpisodeLength = 6173.7001953125


245998 loss:1.6056806

INFO:absl: 
		 NumberOfEpisodes = 51
		 EnvironmentSteps = 246001
		 AverageReturn = -2356.9970703125
		 AverageEpisodeLength = 6173.7001953125


246963 loss:0.9270700

INFO:absl: 
		 NumberOfEpisodes = 51
		 EnvironmentSteps = 247001
		 AverageReturn = -2356.9970703125
		 AverageEpisodeLength = 6173.7001953125


247947 loss:1.5287018

INFO:absl: 
		 NumberOfEpisodes = 51
		 EnvironmentSteps = 248001
		 AverageReturn = -2356.9970703125
		 AverageEpisodeLength = 6173.7001953125


248966 loss:0.8663877

INFO:absl: 
		 NumberOfEpisodes = 51
		 EnvironmentSteps = 249001
		 AverageReturn = -2356.9970703125
		 AverageEpisodeLength = 6173.7001953125


249952 loss:0.8429644

INFO:absl: 
		 NumberOfEpisodes = 51
		 EnvironmentSteps = 250001
		 AverageReturn = -2356.9970703125
		 AverageEpisodeLength = 6173.7001953125


250967 loss:1.0827639

INFO:absl: 
		 NumberOfEpisodes = 51
		 EnvironmentSteps = 251001
		 AverageReturn = -2356.9970703125
		 AverageEpisodeLength = 6173.7001953125


251974 loss:0.7590345

INFO:absl: 
		 NumberOfEpisodes = 51
		 EnvironmentSteps = 252001
		 AverageReturn = -2356.9970703125
		 AverageEpisodeLength = 6173.7001953125


252982 loss:1.0894080

INFO:absl: 
		 NumberOfEpisodes = 51
		 EnvironmentSteps = 253001
		 AverageReturn = -2356.9970703125
		 AverageEpisodeLength = 6173.7001953125


253998 loss:0.6610516

INFO:absl: 
		 NumberOfEpisodes = 51
		 EnvironmentSteps = 254001
		 AverageReturn = -2356.9970703125
		 AverageEpisodeLength = 6173.7001953125


254932 loss:0.6560992

INFO:absl: 
		 NumberOfEpisodes = 52
		 EnvironmentSteps = 255001
		 AverageReturn = -2192.5283203125
		 AverageEpisodeLength = 6173.7001953125


255995 loss:0.7725366

INFO:absl: 
		 NumberOfEpisodes = 52
		 EnvironmentSteps = 256001
		 AverageReturn = -2192.5283203125
		 AverageEpisodeLength = 6173.7001953125


256991 loss:1.2835819

INFO:absl: 
		 NumberOfEpisodes = 52
		 EnvironmentSteps = 257001
		 AverageReturn = -2192.5283203125
		 AverageEpisodeLength = 6173.7001953125


257976 loss:1.3991437

INFO:absl: 
		 NumberOfEpisodes = 52
		 EnvironmentSteps = 258001
		 AverageReturn = -2192.5283203125
		 AverageEpisodeLength = 6173.7001953125


258981 loss:1.0517734

INFO:absl: 
		 NumberOfEpisodes = 52
		 EnvironmentSteps = 259001
		 AverageReturn = -2192.5283203125
		 AverageEpisodeLength = 6173.7001953125


259984 loss:1.4219974

INFO:absl: 
		 NumberOfEpisodes = 52
		 EnvironmentSteps = 260001
		 AverageReturn = -2192.5283203125
		 AverageEpisodeLength = 6173.7001953125


260984 loss:2.2544903

INFO:absl: 
		 NumberOfEpisodes = 52
		 EnvironmentSteps = 261001
		 AverageReturn = -2192.5283203125
		 AverageEpisodeLength = 6173.7001953125


261977 loss:1.0451089

INFO:absl: 
		 NumberOfEpisodes = 52
		 EnvironmentSteps = 262001
		 AverageReturn = -2192.5283203125
		 AverageEpisodeLength = 6173.7001953125


262980 loss:1.1401913

INFO:absl: 
		 NumberOfEpisodes = 52
		 EnvironmentSteps = 263001
		 AverageReturn = -2192.5283203125
		 AverageEpisodeLength = 6173.7001953125


263977 loss:0.5699700

INFO:absl: 
		 NumberOfEpisodes = 52
		 EnvironmentSteps = 264001
		 AverageReturn = -2192.5283203125
		 AverageEpisodeLength = 6173.7001953125


264990 loss:0.6788916

INFO:absl: 
		 NumberOfEpisodes = 53
		 EnvironmentSteps = 265001
		 AverageReturn = -2414.790771484375
		 AverageEpisodeLength = 6881.10009765625


265982 loss:1.1660392

INFO:absl: 
		 NumberOfEpisodes = 53
		 EnvironmentSteps = 266001
		 AverageReturn = -2414.790771484375
		 AverageEpisodeLength = 6881.10009765625


266983 loss:1.7970479

INFO:absl: 
		 NumberOfEpisodes = 53
		 EnvironmentSteps = 267001
		 AverageReturn = -2414.790771484375
		 AverageEpisodeLength = 6881.10009765625


267983 loss:1.0654275

INFO:absl: 
		 NumberOfEpisodes = 53
		 EnvironmentSteps = 268001
		 AverageReturn = -2414.790771484375
		 AverageEpisodeLength = 6881.10009765625


268961 loss:0.7507087

INFO:absl: 
		 NumberOfEpisodes = 53
		 EnvironmentSteps = 269001
		 AverageReturn = -2414.790771484375
		 AverageEpisodeLength = 6881.10009765625


269977 loss:0.9520477

INFO:absl: 
		 NumberOfEpisodes = 53
		 EnvironmentSteps = 270001
		 AverageReturn = -2414.790771484375
		 AverageEpisodeLength = 6881.10009765625


270973 loss:1.0474480

INFO:absl: 
		 NumberOfEpisodes = 53
		 EnvironmentSteps = 271001
		 AverageReturn = -2414.790771484375
		 AverageEpisodeLength = 6881.10009765625


271970 loss:0.3722351

INFO:absl: 
		 NumberOfEpisodes = 53
		 EnvironmentSteps = 272001
		 AverageReturn = -2414.790771484375
		 AverageEpisodeLength = 6881.10009765625


272976 loss:0.7180941

INFO:absl: 
		 NumberOfEpisodes = 53
		 EnvironmentSteps = 273001
		 AverageReturn = -2414.790771484375
		 AverageEpisodeLength = 6881.10009765625


273969 loss:0.8325462

INFO:absl: 
		 NumberOfEpisodes = 53
		 EnvironmentSteps = 274001
		 AverageReturn = -2414.790771484375
		 AverageEpisodeLength = 6881.10009765625


274966 loss:0.8542849

INFO:absl: 
		 NumberOfEpisodes = 54
		 EnvironmentSteps = 275001
		 AverageReturn = -2618.83837890625
		 AverageEpisodeLength = 7490.2001953125


275974 loss:1.4806617

INFO:absl: 
		 NumberOfEpisodes = 54
		 EnvironmentSteps = 276001
		 AverageReturn = -2618.83837890625
		 AverageEpisodeLength = 7490.2001953125


276979 loss:0.9785629

INFO:absl: 
		 NumberOfEpisodes = 54
		 EnvironmentSteps = 277001
		 AverageReturn = -2618.83837890625
		 AverageEpisodeLength = 7490.2001953125


277994 loss:1.4933829

INFO:absl: 
		 NumberOfEpisodes = 54
		 EnvironmentSteps = 278001
		 AverageReturn = -2618.83837890625
		 AverageEpisodeLength = 7490.2001953125


278938 loss:1.3564752

INFO:absl: 
		 NumberOfEpisodes = 54
		 EnvironmentSteps = 279001
		 AverageReturn = -2618.83837890625
		 AverageEpisodeLength = 7490.2001953125


279952 loss:1.1110380

INFO:absl: 
		 NumberOfEpisodes = 54
		 EnvironmentSteps = 280001
		 AverageReturn = -2618.83837890625
		 AverageEpisodeLength = 7490.2001953125


280956 loss:0.8530302

INFO:absl: 
		 NumberOfEpisodes = 54
		 EnvironmentSteps = 281001
		 AverageReturn = -2618.83837890625
		 AverageEpisodeLength = 7490.2001953125


281960 loss:1.5804532

INFO:absl: 
		 NumberOfEpisodes = 54
		 EnvironmentSteps = 282001
		 AverageReturn = -2618.83837890625
		 AverageEpisodeLength = 7490.2001953125


282954 loss:1.0262135

INFO:absl: 
		 NumberOfEpisodes = 54
		 EnvironmentSteps = 283001
		 AverageReturn = -2618.83837890625
		 AverageEpisodeLength = 7490.2001953125


283978 loss:1.4056065

INFO:absl: 
		 NumberOfEpisodes = 54
		 EnvironmentSteps = 284001
		 AverageReturn = -2618.83837890625
		 AverageEpisodeLength = 7490.2001953125


284940 loss:0.5275354

INFO:absl: 
		 NumberOfEpisodes = 55
		 EnvironmentSteps = 285001
		 AverageReturn = -2956.72021484375
		 AverageEpisodeLength = 8259.2998046875


285935 loss:0.9686056

INFO:absl: 
		 NumberOfEpisodes = 55
		 EnvironmentSteps = 286001
		 AverageReturn = -2956.72021484375
		 AverageEpisodeLength = 8259.2998046875


286986 loss:0.7629788

INFO:absl: 
		 NumberOfEpisodes = 55
		 EnvironmentSteps = 287001
		 AverageReturn = -2956.72021484375
		 AverageEpisodeLength = 8259.2998046875


287929 loss:0.8711287

INFO:absl: 
		 NumberOfEpisodes = 55
		 EnvironmentSteps = 288001
		 AverageReturn = -2956.72021484375
		 AverageEpisodeLength = 8259.2998046875


288965 loss:1.5860157

INFO:absl: 
		 NumberOfEpisodes = 55
		 EnvironmentSteps = 289001
		 AverageReturn = -2956.72021484375
		 AverageEpisodeLength = 8259.2998046875


289945 loss:1.2203602

INFO:absl: 
		 NumberOfEpisodes = 55
		 EnvironmentSteps = 290001
		 AverageReturn = -2956.72021484375
		 AverageEpisodeLength = 8259.2998046875


290965 loss:1.1922023

INFO:absl: 
		 NumberOfEpisodes = 55
		 EnvironmentSteps = 291001
		 AverageReturn = -2956.72021484375
		 AverageEpisodeLength = 8259.2998046875


291969 loss:0.9798518

INFO:absl: 
		 NumberOfEpisodes = 56
		 EnvironmentSteps = 292001
		 AverageReturn = -3066.787841796875
		 AverageEpisodeLength = 8505.900390625


292954 loss:1.4121938

INFO:absl: 
		 NumberOfEpisodes = 56
		 EnvironmentSteps = 293001
		 AverageReturn = -3066.787841796875
		 AverageEpisodeLength = 8505.900390625


293986 loss:1.5661104

INFO:absl: 
		 NumberOfEpisodes = 56
		 EnvironmentSteps = 294001
		 AverageReturn = -3066.787841796875
		 AverageEpisodeLength = 8505.900390625


294979 loss:1.0520345

INFO:absl: 
		 NumberOfEpisodes = 56
		 EnvironmentSteps = 295001
		 AverageReturn = -3066.787841796875
		 AverageEpisodeLength = 8505.900390625


295947 loss:1.4210625

INFO:absl: 
		 NumberOfEpisodes = 56
		 EnvironmentSteps = 296001
		 AverageReturn = -3066.787841796875
		 AverageEpisodeLength = 8505.900390625


296935 loss:0.6322200

INFO:absl: 
		 NumberOfEpisodes = 56
		 EnvironmentSteps = 297001
		 AverageReturn = -3066.787841796875
		 AverageEpisodeLength = 8505.900390625


297957 loss:0.7455696

INFO:absl: 
		 NumberOfEpisodes = 56
		 EnvironmentSteps = 298001
		 AverageReturn = -3066.787841796875
		 AverageEpisodeLength = 8505.900390625


298970 loss:1.2838996

INFO:absl: 
		 NumberOfEpisodes = 56
		 EnvironmentSteps = 299001
		 AverageReturn = -3066.787841796875
		 AverageEpisodeLength = 8505.900390625


299995 loss:0.7970164

INFO:absl: 
		 NumberOfEpisodes = 56
		 EnvironmentSteps = 300001
		 AverageReturn = -3066.787841796875
		 AverageEpisodeLength = 8505.900390625


300974 loss:1.1139393

INFO:absl: 
		 NumberOfEpisodes = 56
		 EnvironmentSteps = 301001
		 AverageReturn = -3066.787841796875
		 AverageEpisodeLength = 8505.900390625


301958 loss:0.9487062

INFO:absl: 
		 NumberOfEpisodes = 57
		 EnvironmentSteps = 302001
		 AverageReturn = -3485.66943359375
		 AverageEpisodeLength = 9505.7998046875


302930 loss:0.8370413

INFO:absl: 
		 NumberOfEpisodes = 57
		 EnvironmentSteps = 303001
		 AverageReturn = -3485.66943359375
		 AverageEpisodeLength = 9505.7998046875


303958 loss:0.9095921

INFO:absl: 
		 NumberOfEpisodes = 57
		 EnvironmentSteps = 304001
		 AverageReturn = -3485.66943359375
		 AverageEpisodeLength = 9505.7998046875


304948 loss:0.9156097

INFO:absl: 
		 NumberOfEpisodes = 57
		 EnvironmentSteps = 305001
		 AverageReturn = -3485.66943359375
		 AverageEpisodeLength = 9505.7998046875


305942 loss:0.64140

INFO:absl: 
		 NumberOfEpisodes = 57
		 EnvironmentSteps = 306001
		 AverageReturn = -3485.66943359375
		 AverageEpisodeLength = 9505.7998046875


306998 loss:0.6616211

INFO:absl: 
		 NumberOfEpisodes = 57
		 EnvironmentSteps = 307001
		 AverageReturn = -3485.66943359375
		 AverageEpisodeLength = 9505.7998046875


307960 loss:0.9617725

INFO:absl: 
		 NumberOfEpisodes = 57
		 EnvironmentSteps = 308001
		 AverageReturn = -3485.66943359375
		 AverageEpisodeLength = 9505.7998046875


308965 loss:1.4728182

INFO:absl: 
		 NumberOfEpisodes = 57
		 EnvironmentSteps = 309001
		 AverageReturn = -3485.66943359375
		 AverageEpisodeLength = 9505.7998046875


309988 loss:0.7467554

INFO:absl: 
		 NumberOfEpisodes = 57
		 EnvironmentSteps = 310001
		 AverageReturn = -3485.66943359375
		 AverageEpisodeLength = 9505.7998046875


310952 loss:1.77040

INFO:absl: 
		 NumberOfEpisodes = 57
		 EnvironmentSteps = 311001
		 AverageReturn = -3485.66943359375
		 AverageEpisodeLength = 9505.7998046875


311990 loss:1.5301919

INFO:absl: 
		 NumberOfEpisodes = 58
		 EnvironmentSteps = 312001
		 AverageReturn = -3502.4140625
		 AverageEpisodeLength = 9663.2998046875


312986 loss:0.51400

INFO:absl: 
		 NumberOfEpisodes = 58
		 EnvironmentSteps = 313001
		 AverageReturn = -3502.4140625
		 AverageEpisodeLength = 9663.2998046875


313973 loss:1.53602

INFO:absl: 
		 NumberOfEpisodes = 58
		 EnvironmentSteps = 314001
		 AverageReturn = -3502.4140625
		 AverageEpisodeLength = 9663.2998046875


314965 loss:0.7049830

INFO:absl: 
		 NumberOfEpisodes = 59
		 EnvironmentSteps = 315001
		 AverageReturn = -3352.306640625
		 AverageEpisodeLength = 8922.7998046875


315969 loss:0.67794

INFO:absl: 
		 NumberOfEpisodes = 59
		 EnvironmentSteps = 316001
		 AverageReturn = -3352.306640625
		 AverageEpisodeLength = 8922.7998046875


316950 loss:1.09260

INFO:absl: 
		 NumberOfEpisodes = 59
		 EnvironmentSteps = 317001
		 AverageReturn = -3352.306640625
		 AverageEpisodeLength = 8922.7998046875


317993 loss:2.23515

INFO:absl: 
		 NumberOfEpisodes = 59
		 EnvironmentSteps = 318001
		 AverageReturn = -3352.306640625
		 AverageEpisodeLength = 8922.7998046875


318944 loss:0.5889550

INFO:absl: 
		 NumberOfEpisodes = 59
		 EnvironmentSteps = 319001
		 AverageReturn = -3352.306640625
		 AverageEpisodeLength = 8922.7998046875


319935 loss:2.25343

INFO:absl: 
		 NumberOfEpisodes = 59
		 EnvironmentSteps = 320001
		 AverageReturn = -3352.306640625
		 AverageEpisodeLength = 8922.7998046875


320991 loss:1.2738649

INFO:absl: 
		 NumberOfEpisodes = 59
		 EnvironmentSteps = 321001
		 AverageReturn = -3352.306640625
		 AverageEpisodeLength = 8922.7998046875


321979 loss:1.4545258

INFO:absl: 
		 NumberOfEpisodes = 59
		 EnvironmentSteps = 322001
		 AverageReturn = -3352.306640625
		 AverageEpisodeLength = 8922.7998046875


322947 loss:1.1703283

INFO:absl: 
		 NumberOfEpisodes = 59
		 EnvironmentSteps = 323001
		 AverageReturn = -3352.306640625
		 AverageEpisodeLength = 8922.7998046875


323929 loss:0.9498138

INFO:absl: 
		 NumberOfEpisodes = 59
		 EnvironmentSteps = 324001
		 AverageReturn = -3352.306640625
		 AverageEpisodeLength = 8922.7998046875


324991 loss:0.5666805

INFO:absl: 
		 NumberOfEpisodes = 60
		 EnvironmentSteps = 325001
		 AverageReturn = -3342.05810546875
		 AverageEpisodeLength = 8922.7998046875


325944 loss:0.7541391

INFO:absl: 
		 NumberOfEpisodes = 60
		 EnvironmentSteps = 326001
		 AverageReturn = -3342.05810546875
		 AverageEpisodeLength = 8922.7998046875


326976 loss:1.68689

INFO:absl: 
		 NumberOfEpisodes = 60
		 EnvironmentSteps = 327001
		 AverageReturn = -3342.05810546875
		 AverageEpisodeLength = 8922.7998046875


327976 loss:1.3197386

INFO:absl: 
		 NumberOfEpisodes = 60
		 EnvironmentSteps = 328001
		 AverageReturn = -3342.05810546875
		 AverageEpisodeLength = 8922.7998046875


328986 loss:0.57385

INFO:absl: 
		 NumberOfEpisodes = 60
		 EnvironmentSteps = 329001
		 AverageReturn = -3342.05810546875
		 AverageEpisodeLength = 8922.7998046875


329966 loss:1.0806961

INFO:absl: 
		 NumberOfEpisodes = 60
		 EnvironmentSteps = 330001
		 AverageReturn = -3342.05810546875
		 AverageEpisodeLength = 8922.7998046875


330988 loss:1.2173385

INFO:absl: 
		 NumberOfEpisodes = 60
		 EnvironmentSteps = 331001
		 AverageReturn = -3342.05810546875
		 AverageEpisodeLength = 8922.7998046875


331972 loss:0.5581930

INFO:absl: 
		 NumberOfEpisodes = 60
		 EnvironmentSteps = 332001
		 AverageReturn = -3342.05810546875
		 AverageEpisodeLength = 8922.7998046875


332975 loss:0.4565017

INFO:absl: 
		 NumberOfEpisodes = 60
		 EnvironmentSteps = 333001
		 AverageReturn = -3342.05810546875
		 AverageEpisodeLength = 8922.7998046875


333997 loss:0.8811040

INFO:absl: 
		 NumberOfEpisodes = 61
		 EnvironmentSteps = 334001
		 AverageReturn = -3243.794189453125
		 AverageEpisodeLength = 8891.7001953125


334948 loss:1.55721

INFO:absl: 
		 NumberOfEpisodes = 61
		 EnvironmentSteps = 335001
		 AverageReturn = -3243.794189453125
		 AverageEpisodeLength = 8891.7001953125


335963 loss:0.80052

INFO:absl: 
		 NumberOfEpisodes = 61
		 EnvironmentSteps = 336001
		 AverageReturn = -3243.794189453125
		 AverageEpisodeLength = 8891.7001953125


336971 loss:0.64217

INFO:absl: 
		 NumberOfEpisodes = 61
		 EnvironmentSteps = 337001
		 AverageReturn = -3243.794189453125
		 AverageEpisodeLength = 8891.7001953125


337949 loss:0.4319758

INFO:absl: 
		 NumberOfEpisodes = 61
		 EnvironmentSteps = 338001
		 AverageReturn = -3243.794189453125
		 AverageEpisodeLength = 8891.7001953125


338971 loss:0.5664105

INFO:absl: 
		 NumberOfEpisodes = 61
		 EnvironmentSteps = 339001
		 AverageReturn = -3243.794189453125
		 AverageEpisodeLength = 8891.7001953125


339945 loss:1.1002387

INFO:absl: 
		 NumberOfEpisodes = 61
		 EnvironmentSteps = 340001
		 AverageReturn = -3243.794189453125
		 AverageEpisodeLength = 8891.7001953125


340998 loss:0.72484

INFO:absl: 
		 NumberOfEpisodes = 61
		 EnvironmentSteps = 341001
		 AverageReturn = -3243.794189453125
		 AverageEpisodeLength = 8891.7001953125


341981 loss:0.3180882

INFO:absl: 
		 NumberOfEpisodes = 61
		 EnvironmentSteps = 342001
		 AverageReturn = -3243.794189453125
		 AverageEpisodeLength = 8891.7001953125


342962 loss:0.9935626

INFO:absl: 
		 NumberOfEpisodes = 61
		 EnvironmentSteps = 343001
		 AverageReturn = -3243.794189453125
		 AverageEpisodeLength = 8891.7001953125


343924 loss:0.9271321

INFO:absl: 
		 NumberOfEpisodes = 62
		 EnvironmentSteps = 344001
		 AverageReturn = -3158.666015625
		 AverageEpisodeLength = 8891.7001953125


344998 loss:1.0218078

INFO:absl: 
		 NumberOfEpisodes = 62
		 EnvironmentSteps = 345001
		 AverageReturn = -3158.666015625
		 AverageEpisodeLength = 8891.7001953125


345981 loss:0.9213911

INFO:absl: 
		 NumberOfEpisodes = 62
		 EnvironmentSteps = 346001
		 AverageReturn = -3158.666015625
		 AverageEpisodeLength = 8891.7001953125


346957 loss:1.0393137

INFO:absl: 
		 NumberOfEpisodes = 62
		 EnvironmentSteps = 347001
		 AverageReturn = -3158.666015625
		 AverageEpisodeLength = 8891.7001953125


347960 loss:0.9121358

INFO:absl: 
		 NumberOfEpisodes = 63
		 EnvironmentSteps = 348001
		 AverageReturn = -2937.080810546875
		 AverageEpisodeLength = 8239.7998046875


348931 loss:0.8489626

INFO:absl: 
		 NumberOfEpisodes = 63
		 EnvironmentSteps = 349001
		 AverageReturn = -2937.080810546875
		 AverageEpisodeLength = 8239.7998046875


349943 loss:0.6867875

INFO:absl: 
		 NumberOfEpisodes = 63
		 EnvironmentSteps = 350001
		 AverageReturn = -2937.080810546875
		 AverageEpisodeLength = 8239.7998046875


350989 loss:0.65255

INFO:absl: 
		 NumberOfEpisodes = 63
		 EnvironmentSteps = 351001
		 AverageReturn = -2937.080810546875
		 AverageEpisodeLength = 8239.7998046875


351984 loss:0.5892953

INFO:absl: 
		 NumberOfEpisodes = 63
		 EnvironmentSteps = 352001
		 AverageReturn = -2937.080810546875
		 AverageEpisodeLength = 8239.7998046875


352936 loss:0.9112178

INFO:absl: 
		 NumberOfEpisodes = 63
		 EnvironmentSteps = 353001
		 AverageReturn = -2937.080810546875
		 AverageEpisodeLength = 8239.7998046875


353986 loss:0.7545136

INFO:absl: 
		 NumberOfEpisodes = 63
		 EnvironmentSteps = 354001
		 AverageReturn = -2937.080810546875
		 AverageEpisodeLength = 8239.7998046875


354977 loss:1.1758503

INFO:absl: 
		 NumberOfEpisodes = 63
		 EnvironmentSteps = 355001
		 AverageReturn = -2937.080810546875
		 AverageEpisodeLength = 8239.7998046875


355979 loss:0.2887237

INFO:absl: 
		 NumberOfEpisodes = 63
		 EnvironmentSteps = 356001
		 AverageReturn = -2937.080810546875
		 AverageEpisodeLength = 8239.7998046875


356938 loss:0.3355933

INFO:absl: 
		 NumberOfEpisodes = 63
		 EnvironmentSteps = 357001
		 AverageReturn = -2937.080810546875
		 AverageEpisodeLength = 8239.7998046875


357965 loss:0.7200080

INFO:absl: 
		 NumberOfEpisodes = 64
		 EnvironmentSteps = 358001
		 AverageReturn = -2942.72607421875
		 AverageEpisodeLength = 8239.7998046875


358966 loss:1.1291349

INFO:absl: 
		 NumberOfEpisodes = 64
		 EnvironmentSteps = 359001
		 AverageReturn = -2942.72607421875
		 AverageEpisodeLength = 8239.7998046875


359980 loss:1.02783

INFO:absl: 
		 NumberOfEpisodes = 64
		 EnvironmentSteps = 360001
		 AverageReturn = -2942.72607421875
		 AverageEpisodeLength = 8239.7998046875


360969 loss:0.6278618

INFO:absl: 
		 NumberOfEpisodes = 64
		 EnvironmentSteps = 361001
		 AverageReturn = -2942.72607421875
		 AverageEpisodeLength = 8239.7998046875


361967 loss:0.6828806

INFO:absl: 
		 NumberOfEpisodes = 64
		 EnvironmentSteps = 362001
		 AverageReturn = -2942.72607421875
		 AverageEpisodeLength = 8239.7998046875


362978 loss:1.1250422

INFO:absl: 
		 NumberOfEpisodes = 64
		 EnvironmentSteps = 363001
		 AverageReturn = -2942.72607421875
		 AverageEpisodeLength = 8239.7998046875


363971 loss:1.21517

INFO:absl: 
		 NumberOfEpisodes = 64
		 EnvironmentSteps = 364001
		 AverageReturn = -2942.72607421875
		 AverageEpisodeLength = 8239.7998046875


364939 loss:1.2357851

INFO:absl: 
		 NumberOfEpisodes = 64
		 EnvironmentSteps = 365001
		 AverageReturn = -2942.72607421875
		 AverageEpisodeLength = 8239.7998046875


365930 loss:0.9276478

INFO:absl: 
		 NumberOfEpisodes = 64
		 EnvironmentSteps = 366001
		 AverageReturn = -2942.72607421875
		 AverageEpisodeLength = 8239.7998046875


366953 loss:1.3182607

INFO:absl: 
		 NumberOfEpisodes = 65
		 EnvironmentSteps = 367001
		 AverageReturn = -2948.409423828125
		 AverageEpisodeLength = 8209.900390625


367966 loss:0.6904544

INFO:absl: 
		 NumberOfEpisodes = 65
		 EnvironmentSteps = 368001
		 AverageReturn = -2948.409423828125
		 AverageEpisodeLength = 8209.900390625


368969 loss:0.6934667

INFO:absl: 
		 NumberOfEpisodes = 65
		 EnvironmentSteps = 369001
		 AverageReturn = -2948.409423828125
		 AverageEpisodeLength = 8209.900390625


369972 loss:0.3589391

INFO:absl: 
		 NumberOfEpisodes = 65
		 EnvironmentSteps = 370001
		 AverageReturn = -2948.409423828125
		 AverageEpisodeLength = 8209.900390625


370963 loss:0.6002737

INFO:absl: 
		 NumberOfEpisodes = 65
		 EnvironmentSteps = 371001
		 AverageReturn = -2948.409423828125
		 AverageEpisodeLength = 8209.900390625


371978 loss:1.3503483

INFO:absl: 
		 NumberOfEpisodes = 65
		 EnvironmentSteps = 372001
		 AverageReturn = -2948.409423828125
		 AverageEpisodeLength = 8209.900390625


372973 loss:1.1212784

INFO:absl: 
		 NumberOfEpisodes = 65
		 EnvironmentSteps = 373001
		 AverageReturn = -2948.409423828125
		 AverageEpisodeLength = 8209.900390625


373967 loss:1.5830596

INFO:absl: 
		 NumberOfEpisodes = 65
		 EnvironmentSteps = 374001
		 AverageReturn = -2948.409423828125
		 AverageEpisodeLength = 8209.900390625


374940 loss:0.9354638

INFO:absl: 
		 NumberOfEpisodes = 65
		 EnvironmentSteps = 375001
		 AverageReturn = -2948.409423828125
		 AverageEpisodeLength = 8209.900390625


375968 loss:0.6060309

INFO:absl: 
		 NumberOfEpisodes = 65
		 EnvironmentSteps = 376001
		 AverageReturn = -2948.409423828125
		 AverageEpisodeLength = 8209.900390625


376988 loss:0.5003089

INFO:absl: 
		 NumberOfEpisodes = 66
		 EnvironmentSteps = 377001
		 AverageReturn = -2897.796142578125
		 AverageEpisodeLength = 8546.599609375


377978 loss:1.0380307

INFO:absl: 
		 NumberOfEpisodes = 66
		 EnvironmentSteps = 378001
		 AverageReturn = -2897.796142578125
		 AverageEpisodeLength = 8546.599609375


378983 loss:0.4371008

INFO:absl: 
		 NumberOfEpisodes = 66
		 EnvironmentSteps = 379001
		 AverageReturn = -2897.796142578125
		 AverageEpisodeLength = 8546.599609375


379990 loss:1.2911889

INFO:absl: 
		 NumberOfEpisodes = 66
		 EnvironmentSteps = 380001
		 AverageReturn = -2897.796142578125
		 AverageEpisodeLength = 8546.599609375


380988 loss:0.6491972

INFO:absl: 
		 NumberOfEpisodes = 66
		 EnvironmentSteps = 381001
		 AverageReturn = -2897.796142578125
		 AverageEpisodeLength = 8546.599609375


381937 loss:1.1659838

INFO:absl: 
		 NumberOfEpisodes = 66
		 EnvironmentSteps = 382001
		 AverageReturn = -2897.796142578125
		 AverageEpisodeLength = 8546.599609375


382959 loss:0.4938834

INFO:absl: 
		 NumberOfEpisodes = 66
		 EnvironmentSteps = 383001
		 AverageReturn = -2897.796142578125
		 AverageEpisodeLength = 8546.599609375


383949 loss:1.0028663

INFO:absl: 
		 NumberOfEpisodes = 66
		 EnvironmentSteps = 384001
		 AverageReturn = -2897.796142578125
		 AverageEpisodeLength = 8546.599609375


384943 loss:0.5792146

INFO:absl: 
		 NumberOfEpisodes = 66
		 EnvironmentSteps = 385001
		 AverageReturn = -2897.796142578125
		 AverageEpisodeLength = 8546.599609375


385988 loss:0.4528585

INFO:absl: 
		 NumberOfEpisodes = 66
		 EnvironmentSteps = 386001
		 AverageReturn = -2897.796142578125
		 AverageEpisodeLength = 8546.599609375


386947 loss:0.4427049

INFO:absl: 
		 NumberOfEpisodes = 67
		 EnvironmentSteps = 387001
		 AverageReturn = -2682.87158203125
		 AverageEpisodeLength = 8546.599609375


387943 loss:0.5846779

INFO:absl: 
		 NumberOfEpisodes = 67
		 EnvironmentSteps = 388001
		 AverageReturn = -2682.87158203125
		 AverageEpisodeLength = 8546.599609375


388962 loss:1.0591735

INFO:absl: 
		 NumberOfEpisodes = 67
		 EnvironmentSteps = 389001
		 AverageReturn = -2682.87158203125
		 AverageEpisodeLength = 8546.599609375


389976 loss:0.6476178

INFO:absl: 
		 NumberOfEpisodes = 67
		 EnvironmentSteps = 390001
		 AverageReturn = -2682.87158203125
		 AverageEpisodeLength = 8546.599609375


390939 loss:0.6478236

INFO:absl: 
		 NumberOfEpisodes = 67
		 EnvironmentSteps = 391001
		 AverageReturn = -2682.87158203125
		 AverageEpisodeLength = 8546.599609375


391980 loss:0.6984216

INFO:absl: 
		 NumberOfEpisodes = 67
		 EnvironmentSteps = 392001
		 AverageReturn = -2682.87158203125
		 AverageEpisodeLength = 8546.599609375


392940 loss:0.57045

INFO:absl: 
		 NumberOfEpisodes = 67
		 EnvironmentSteps = 393001
		 AverageReturn = -2682.87158203125
		 AverageEpisodeLength = 8546.599609375


393998 loss:0.7828945

INFO:absl: 
		 NumberOfEpisodes = 67
		 EnvironmentSteps = 394001
		 AverageReturn = -2682.87158203125
		 AverageEpisodeLength = 8546.599609375


394979 loss:1.2490488

INFO:absl: 
		 NumberOfEpisodes = 67
		 EnvironmentSteps = 395001
		 AverageReturn = -2682.87158203125
		 AverageEpisodeLength = 8546.599609375


395946 loss:0.8176679

INFO:absl: 
		 NumberOfEpisodes = 67
		 EnvironmentSteps = 396001
		 AverageReturn = -2682.87158203125
		 AverageEpisodeLength = 8546.599609375


396991 loss:0.7436177

INFO:absl: 
		 NumberOfEpisodes = 68
		 EnvironmentSteps = 397001
		 AverageReturn = -2720.795654296875
		 AverageEpisodeLength = 8546.599609375


397940 loss:0.4768946

INFO:absl: 
		 NumberOfEpisodes = 68
		 EnvironmentSteps = 398001
		 AverageReturn = -2720.795654296875
		 AverageEpisodeLength = 8546.599609375


398970 loss:448.51840

INFO:absl: 
		 NumberOfEpisodes = 68
		 EnvironmentSteps = 399001
		 AverageReturn = -2720.795654296875
		 AverageEpisodeLength = 8546.599609375


399943 loss:1.0680615

INFO:absl: 
		 NumberOfEpisodes = 68
		 EnvironmentSteps = 400001
		 AverageReturn = -2720.795654296875
		 AverageEpisodeLength = 8546.599609375


400976 loss:0.4257684

INFO:absl: 
		 NumberOfEpisodes = 68
		 EnvironmentSteps = 401001
		 AverageReturn = -2720.795654296875
		 AverageEpisodeLength = 8546.599609375


401999 loss:0.9034967

INFO:absl: 
		 NumberOfEpisodes = 68
		 EnvironmentSteps = 402001
		 AverageReturn = -2720.795654296875
		 AverageEpisodeLength = 8546.599609375


402990 loss:0.8140804

INFO:absl: 
		 NumberOfEpisodes = 68
		 EnvironmentSteps = 403001
		 AverageReturn = -2720.795654296875
		 AverageEpisodeLength = 8546.599609375


403956 loss:0.3737168

INFO:absl: 
		 NumberOfEpisodes = 68
		 EnvironmentSteps = 404001
		 AverageReturn = -2720.795654296875
		 AverageEpisodeLength = 8546.599609375


404941 loss:1.4224594

INFO:absl: 
		 NumberOfEpisodes = 68
		 EnvironmentSteps = 405001
		 AverageReturn = -2720.795654296875
		 AverageEpisodeLength = 8546.599609375


405968 loss:0.6092492

INFO:absl: 
		 NumberOfEpisodes = 68
		 EnvironmentSteps = 406001
		 AverageReturn = -2720.795654296875
		 AverageEpisodeLength = 8546.599609375


406951 loss:0.8839247

INFO:absl: 
		 NumberOfEpisodes = 69
		 EnvironmentSteps = 407001
		 AverageReturn = -2918.05078125
		 AverageEpisodeLength = 9287.099609375


407965 loss:0.57691

INFO:absl: 
		 NumberOfEpisodes = 69
		 EnvironmentSteps = 408001
		 AverageReturn = -2918.05078125
		 AverageEpisodeLength = 9287.099609375


408972 loss:0.8555672

INFO:absl: 
		 NumberOfEpisodes = 69
		 EnvironmentSteps = 409001
		 AverageReturn = -2918.05078125
		 AverageEpisodeLength = 9287.099609375


409942 loss:0.4990122

INFO:absl: 
		 NumberOfEpisodes = 69
		 EnvironmentSteps = 410001
		 AverageReturn = -2918.05078125
		 AverageEpisodeLength = 9287.099609375


410971 loss:0.6910931

INFO:absl: 
		 NumberOfEpisodes = 69
		 EnvironmentSteps = 411001
		 AverageReturn = -2918.05078125
		 AverageEpisodeLength = 9287.099609375


411961 loss:1.2636190

INFO:absl: 
		 NumberOfEpisodes = 69
		 EnvironmentSteps = 412001
		 AverageReturn = -2918.05078125
		 AverageEpisodeLength = 9287.099609375


412966 loss:0.7439006

INFO:absl: 
		 NumberOfEpisodes = 69
		 EnvironmentSteps = 413001
		 AverageReturn = -2918.05078125
		 AverageEpisodeLength = 9287.099609375


413937 loss:0.63833

INFO:absl: 
		 NumberOfEpisodes = 69
		 EnvironmentSteps = 414001
		 AverageReturn = -2918.05078125
		 AverageEpisodeLength = 9287.099609375


414961 loss:0.4660293

INFO:absl: 
		 NumberOfEpisodes = 69
		 EnvironmentSteps = 415001
		 AverageReturn = -2918.05078125
		 AverageEpisodeLength = 9287.099609375


415981 loss:1.0394789

INFO:absl: 
		 NumberOfEpisodes = 71
		 EnvironmentSteps = 416001
		 AverageReturn = -2544.54345703125
		 AverageEpisodeLength = 8144.7001953125


416947 loss:0.5966502

INFO:absl: 
		 NumberOfEpisodes = 71
		 EnvironmentSteps = 417001
		 AverageReturn = -2544.54345703125
		 AverageEpisodeLength = 8144.7001953125


417963 loss:1.2750867

INFO:absl: 
		 NumberOfEpisodes = 71
		 EnvironmentSteps = 418001
		 AverageReturn = -2544.54345703125
		 AverageEpisodeLength = 8144.7001953125


418958 loss:0.5461283

INFO:absl: 
		 NumberOfEpisodes = 71
		 EnvironmentSteps = 419001
		 AverageReturn = -2544.54345703125
		 AverageEpisodeLength = 8144.7001953125


419954 loss:0.9719791

INFO:absl: 
		 NumberOfEpisodes = 71
		 EnvironmentSteps = 420001
		 AverageReturn = -2544.54345703125
		 AverageEpisodeLength = 8144.7001953125


420948 loss:0.8278422

INFO:absl: 
		 NumberOfEpisodes = 71
		 EnvironmentSteps = 421001
		 AverageReturn = -2544.54345703125
		 AverageEpisodeLength = 8144.7001953125


421942 loss:1.1085796

INFO:absl: 
		 NumberOfEpisodes = 71
		 EnvironmentSteps = 422001
		 AverageReturn = -2544.54345703125
		 AverageEpisodeLength = 8144.7001953125


422993 loss:1.2832173

INFO:absl: 
		 NumberOfEpisodes = 71
		 EnvironmentSteps = 423001
		 AverageReturn = -2544.54345703125
		 AverageEpisodeLength = 8144.7001953125


423937 loss:1.0062881

INFO:absl: 
		 NumberOfEpisodes = 71
		 EnvironmentSteps = 424001
		 AverageReturn = -2544.54345703125
		 AverageEpisodeLength = 8144.7001953125


424979 loss:0.7204970

INFO:absl: 
		 NumberOfEpisodes = 72
		 EnvironmentSteps = 425001
		 AverageReturn = -2611.416015625
		 AverageEpisodeLength = 8043.2001953125


425981 loss:0.5284800

INFO:absl: 
		 NumberOfEpisodes = 72
		 EnvironmentSteps = 426001
		 AverageReturn = -2611.416015625
		 AverageEpisodeLength = 8043.2001953125


426942 loss:0.7270409

INFO:absl: 
		 NumberOfEpisodes = 72
		 EnvironmentSteps = 427001
		 AverageReturn = -2611.416015625
		 AverageEpisodeLength = 8043.2001953125


427957 loss:1.2634333

INFO:absl: 
		 NumberOfEpisodes = 72
		 EnvironmentSteps = 428001
		 AverageReturn = -2611.416015625
		 AverageEpisodeLength = 8043.2001953125


428935 loss:1.2444132

INFO:absl: 
		 NumberOfEpisodes = 72
		 EnvironmentSteps = 429001
		 AverageReturn = -2611.416015625
		 AverageEpisodeLength = 8043.2001953125


429943 loss:0.4841847

INFO:absl: 
		 NumberOfEpisodes = 72
		 EnvironmentSteps = 430001
		 AverageReturn = -2611.416015625
		 AverageEpisodeLength = 8043.2001953125


430993 loss:0.8580342

INFO:absl: 
		 NumberOfEpisodes = 72
		 EnvironmentSteps = 431001
		 AverageReturn = -2611.416015625
		 AverageEpisodeLength = 8043.2001953125


431939 loss:0.6106182

INFO:absl: 
		 NumberOfEpisodes = 72
		 EnvironmentSteps = 432001
		 AverageReturn = -2611.416015625
		 AverageEpisodeLength = 8043.2001953125


432941 loss:1.0906940

INFO:absl: 
		 NumberOfEpisodes = 72
		 EnvironmentSteps = 433001
		 AverageReturn = -2611.416015625
		 AverageEpisodeLength = 8043.2001953125


433961 loss:1.3759705

INFO:absl: 
		 NumberOfEpisodes = 72
		 EnvironmentSteps = 434001
		 AverageReturn = -2611.416015625
		 AverageEpisodeLength = 8043.2001953125


434974 loss:0.7253623

INFO:absl: 
		 NumberOfEpisodes = 73
		 EnvironmentSteps = 435001
		 AverageReturn = -2843.856689453125
		 AverageEpisodeLength = 8695.099609375


435998 loss:0.7188476

INFO:absl: 
		 NumberOfEpisodes = 73
		 EnvironmentSteps = 436001
		 AverageReturn = -2843.856689453125
		 AverageEpisodeLength = 8695.099609375


436993 loss:0.6795074

INFO:absl: 
		 NumberOfEpisodes = 73
		 EnvironmentSteps = 437001
		 AverageReturn = -2843.856689453125
		 AverageEpisodeLength = 8695.099609375


437959 loss:1.0751479

INFO:absl: 
		 NumberOfEpisodes = 73
		 EnvironmentSteps = 438001
		 AverageReturn = -2843.856689453125
		 AverageEpisodeLength = 8695.099609375


438937 loss:1.3751853

INFO:absl: 
		 NumberOfEpisodes = 73
		 EnvironmentSteps = 439001
		 AverageReturn = -2843.856689453125
		 AverageEpisodeLength = 8695.099609375


439966 loss:1.1060083

INFO:absl: 
		 NumberOfEpisodes = 73
		 EnvironmentSteps = 440001
		 AverageReturn = -2843.856689453125
		 AverageEpisodeLength = 8695.099609375


440995 loss:0.8214646

INFO:absl: 
		 NumberOfEpisodes = 73
		 EnvironmentSteps = 441001
		 AverageReturn = -2843.856689453125
		 AverageEpisodeLength = 8695.099609375


441970 loss:0.9430934

INFO:absl: 
		 NumberOfEpisodes = 73
		 EnvironmentSteps = 442001
		 AverageReturn = -2843.856689453125
		 AverageEpisodeLength = 8695.099609375


442972 loss:1.3492983

INFO:absl: 
		 NumberOfEpisodes = 73
		 EnvironmentSteps = 443001
		 AverageReturn = -2843.856689453125
		 AverageEpisodeLength = 8695.099609375


443971 loss:0.9279627

INFO:absl: 
		 NumberOfEpisodes = 73
		 EnvironmentSteps = 444001
		 AverageReturn = -2843.856689453125
		 AverageEpisodeLength = 8695.099609375


444980 loss:0.8026910

INFO:absl: 
		 NumberOfEpisodes = 74
		 EnvironmentSteps = 445001
		 AverageReturn = -2860.497314453125
		 AverageEpisodeLength = 8695.099609375


445938 loss:0.7077400

INFO:absl: 
		 NumberOfEpisodes = 74
		 EnvironmentSteps = 446001
		 AverageReturn = -2860.497314453125
		 AverageEpisodeLength = 8695.099609375


446984 loss:0.8147893

INFO:absl: 
		 NumberOfEpisodes = 74
		 EnvironmentSteps = 447001
		 AverageReturn = -2860.497314453125
		 AverageEpisodeLength = 8695.099609375


447963 loss:0.3700805

INFO:absl: 
		 NumberOfEpisodes = 74
		 EnvironmentSteps = 448001
		 AverageReturn = -2860.497314453125
		 AverageEpisodeLength = 8695.099609375


448977 loss:0.7080945

INFO:absl: 
		 NumberOfEpisodes = 74
		 EnvironmentSteps = 449001
		 AverageReturn = -2860.497314453125
		 AverageEpisodeLength = 8695.099609375


449991 loss:0.8482511

INFO:absl: 
		 NumberOfEpisodes = 74
		 EnvironmentSteps = 450001
		 AverageReturn = -2860.497314453125
		 AverageEpisodeLength = 8695.099609375


450994 loss:0.6787511

INFO:absl: 
		 NumberOfEpisodes = 74
		 EnvironmentSteps = 451001
		 AverageReturn = -2860.497314453125
		 AverageEpisodeLength = 8695.099609375


451949 loss:0.9120430

INFO:absl: 
		 NumberOfEpisodes = 74
		 EnvironmentSteps = 452001
		 AverageReturn = -2860.497314453125
		 AverageEpisodeLength = 8695.099609375


452995 loss:1.07212

INFO:absl: 
		 NumberOfEpisodes = 74
		 EnvironmentSteps = 453001
		 AverageReturn = -2860.497314453125
		 AverageEpisodeLength = 8695.099609375


453935 loss:0.8220910

INFO:absl: 
		 NumberOfEpisodes = 74
		 EnvironmentSteps = 454001
		 AverageReturn = -2860.497314453125
		 AverageEpisodeLength = 8695.099609375


454951 loss:1.0419762

INFO:absl: 
		 NumberOfEpisodes = 75
		 EnvironmentSteps = 455001
		 AverageReturn = -2556.481689453125
		 AverageEpisodeLength = 8725.0


455953 loss:1.03984

INFO:absl: 
		 NumberOfEpisodes = 75
		 EnvironmentSteps = 456001
		 AverageReturn = -2556.481689453125
		 AverageEpisodeLength = 8725.0


456948 loss:1.2031641

INFO:absl: 
		 NumberOfEpisodes = 75
		 EnvironmentSteps = 457001
		 AverageReturn = -2556.481689453125
		 AverageEpisodeLength = 8725.0


457936 loss:0.7068835

INFO:absl: 
		 NumberOfEpisodes = 75
		 EnvironmentSteps = 458001
		 AverageReturn = -2556.481689453125
		 AverageEpisodeLength = 8725.0


458964 loss:0.5289718

INFO:absl: 
		 NumberOfEpisodes = 75
		 EnvironmentSteps = 459001
		 AverageReturn = -2556.481689453125
		 AverageEpisodeLength = 8725.0


459999 loss:0.7658346

INFO:absl: 
		 NumberOfEpisodes = 75
		 EnvironmentSteps = 460001
		 AverageReturn = -2556.481689453125
		 AverageEpisodeLength = 8725.0


460976 loss:0.7471307

INFO:absl: 
		 NumberOfEpisodes = 75
		 EnvironmentSteps = 461001
		 AverageReturn = -2556.481689453125
		 AverageEpisodeLength = 8725.0


461976 loss:1.3942285

INFO:absl: 
		 NumberOfEpisodes = 75
		 EnvironmentSteps = 462001
		 AverageReturn = -2556.481689453125
		 AverageEpisodeLength = 8725.0


462995 loss:0.6637203

INFO:absl: 
		 NumberOfEpisodes = 75
		 EnvironmentSteps = 463001
		 AverageReturn = -2556.481689453125
		 AverageEpisodeLength = 8725.0


463967 loss:0.7393479

INFO:absl: 
		 NumberOfEpisodes = 75
		 EnvironmentSteps = 464001
		 AverageReturn = -2556.481689453125
		 AverageEpisodeLength = 8725.0


464941 loss:1.3268974

INFO:absl: 
		 NumberOfEpisodes = 77
		 EnvironmentSteps = 465001
		 AverageReturn = -2380.676513671875
		 AverageEpisodeLength = 7725.10009765625


465984 loss:0.5960124

INFO:absl: 
		 NumberOfEpisodes = 77
		 EnvironmentSteps = 466001
		 AverageReturn = -2380.676513671875
		 AverageEpisodeLength = 7725.10009765625


466957 loss:1.3602817

INFO:absl: 
		 NumberOfEpisodes = 77
		 EnvironmentSteps = 467001
		 AverageReturn = -2380.676513671875
		 AverageEpisodeLength = 7725.10009765625


467938 loss:1.2875631

INFO:absl: 
		 NumberOfEpisodes = 77
		 EnvironmentSteps = 468001
		 AverageReturn = -2380.676513671875
		 AverageEpisodeLength = 7725.10009765625


468973 loss:0.4801382

INFO:absl: 
		 NumberOfEpisodes = 77
		 EnvironmentSteps = 469001
		 AverageReturn = -2380.676513671875
		 AverageEpisodeLength = 7725.10009765625


469971 loss:0.6743175

INFO:absl: 
		 NumberOfEpisodes = 77
		 EnvironmentSteps = 470001
		 AverageReturn = -2380.676513671875
		 AverageEpisodeLength = 7725.10009765625


470970 loss:0.9960122

INFO:absl: 
		 NumberOfEpisodes = 77
		 EnvironmentSteps = 471001
		 AverageReturn = -2380.676513671875
		 AverageEpisodeLength = 7725.10009765625


471964 loss:0.7285622

INFO:absl: 
		 NumberOfEpisodes = 77
		 EnvironmentSteps = 472001
		 AverageReturn = -2380.676513671875
		 AverageEpisodeLength = 7725.10009765625


472984 loss:0.5203937

INFO:absl: 
		 NumberOfEpisodes = 77
		 EnvironmentSteps = 473001
		 AverageReturn = -2380.676513671875
		 AverageEpisodeLength = 7725.10009765625


473969 loss:0.6876049

INFO:absl: 
		 NumberOfEpisodes = 77
		 EnvironmentSteps = 474001
		 AverageReturn = -2380.676513671875
		 AverageEpisodeLength = 7725.10009765625


474936 loss:0.61417

INFO:absl: 
		 NumberOfEpisodes = 78
		 EnvironmentSteps = 475001
		 AverageReturn = -2289.92578125
		 AverageEpisodeLength = 7725.10009765625


475953 loss:2.2070000

INFO:absl: 
		 NumberOfEpisodes = 78
		 EnvironmentSteps = 476001
		 AverageReturn = -2289.92578125
		 AverageEpisodeLength = 7725.10009765625


476950 loss:1.0560418

INFO:absl: 
		 NumberOfEpisodes = 78
		 EnvironmentSteps = 477001
		 AverageReturn = -2289.92578125
		 AverageEpisodeLength = 7725.10009765625


477945 loss:0.9809922

INFO:absl: 
		 NumberOfEpisodes = 78
		 EnvironmentSteps = 478001
		 AverageReturn = -2289.92578125
		 AverageEpisodeLength = 7725.10009765625


478985 loss:1.6621671

INFO:absl: 
		 NumberOfEpisodes = 78
		 EnvironmentSteps = 479001
		 AverageReturn = -2289.92578125
		 AverageEpisodeLength = 7725.10009765625


479941 loss:1.6849788

INFO:absl: 
		 NumberOfEpisodes = 78
		 EnvironmentSteps = 480001
		 AverageReturn = -2289.92578125
		 AverageEpisodeLength = 7725.10009765625


480985 loss:1.2923546

INFO:absl: 
		 NumberOfEpisodes = 79
		 EnvironmentSteps = 481001
		 AverageReturn = -2171.28955078125
		 AverageEpisodeLength = 7357.39990234375


481977 loss:0.57081

INFO:absl: 
		 NumberOfEpisodes = 79
		 EnvironmentSteps = 482001
		 AverageReturn = -2171.28955078125
		 AverageEpisodeLength = 7357.39990234375


482985 loss:0.6046639

INFO:absl: 
		 NumberOfEpisodes = 79
		 EnvironmentSteps = 483001
		 AverageReturn = -2171.28955078125
		 AverageEpisodeLength = 7357.39990234375


483991 loss:1.2924674

INFO:absl: 
		 NumberOfEpisodes = 79
		 EnvironmentSteps = 484001
		 AverageReturn = -2171.28955078125
		 AverageEpisodeLength = 7357.39990234375


484984 loss:0.5574454

INFO:absl: 
		 NumberOfEpisodes = 79
		 EnvironmentSteps = 485001
		 AverageReturn = -2171.28955078125
		 AverageEpisodeLength = 7357.39990234375


485965 loss:0.7478019

INFO:absl: 
		 NumberOfEpisodes = 79
		 EnvironmentSteps = 486001
		 AverageReturn = -2171.28955078125
		 AverageEpisodeLength = 7357.39990234375


486940 loss:1.2362460

INFO:absl: 
		 NumberOfEpisodes = 79
		 EnvironmentSteps = 487001
		 AverageReturn = -2171.28955078125
		 AverageEpisodeLength = 7357.39990234375


487975 loss:1.5596906

INFO:absl: 
		 NumberOfEpisodes = 79
		 EnvironmentSteps = 488001
		 AverageReturn = -2171.28955078125
		 AverageEpisodeLength = 7357.39990234375


488941 loss:1.0847957

INFO:absl: 
		 NumberOfEpisodes = 79
		 EnvironmentSteps = 489001
		 AverageReturn = -2171.28955078125
		 AverageEpisodeLength = 7357.39990234375


489946 loss:0.7851196

INFO:absl: 
		 NumberOfEpisodes = 79
		 EnvironmentSteps = 490001
		 AverageReturn = -2171.28955078125
		 AverageEpisodeLength = 7357.39990234375


490959 loss:0.9956031

INFO:absl: 
		 NumberOfEpisodes = 80
		 EnvironmentSteps = 491001
		 AverageReturn = -2233.992919921875
		 AverageEpisodeLength = 7534.2998046875


491997 loss:0.5660860

INFO:absl: 
		 NumberOfEpisodes = 80
		 EnvironmentSteps = 492001
		 AverageReturn = -2233.992919921875
		 AverageEpisodeLength = 7534.2998046875


492979 loss:0.5465596

INFO:absl: 
		 NumberOfEpisodes = 80
		 EnvironmentSteps = 493001
		 AverageReturn = -2233.992919921875
		 AverageEpisodeLength = 7534.2998046875


493962 loss:0.6829936

INFO:absl: 
		 NumberOfEpisodes = 80
		 EnvironmentSteps = 494001
		 AverageReturn = -2233.992919921875
		 AverageEpisodeLength = 7534.2998046875


494977 loss:0.6833721

INFO:absl: 
		 NumberOfEpisodes = 80
		 EnvironmentSteps = 495001
		 AverageReturn = -2233.992919921875
		 AverageEpisodeLength = 7534.2998046875


495950 loss:1.2081270

INFO:absl: 
		 NumberOfEpisodes = 80
		 EnvironmentSteps = 496001
		 AverageReturn = -2233.992919921875
		 AverageEpisodeLength = 7534.2998046875


496947 loss:0.8644898

INFO:absl: 
		 NumberOfEpisodes = 80
		 EnvironmentSteps = 497001
		 AverageReturn = -2233.992919921875
		 AverageEpisodeLength = 7534.2998046875


497964 loss:0.9287765

INFO:absl: 
		 NumberOfEpisodes = 80
		 EnvironmentSteps = 498001
		 AverageReturn = -2233.992919921875
		 AverageEpisodeLength = 7534.2998046875


498984 loss:0.8688259

INFO:absl: 
		 NumberOfEpisodes = 80
		 EnvironmentSteps = 499001
		 AverageReturn = -2233.992919921875
		 AverageEpisodeLength = 7534.2998046875


499999 loss:0.3590486

In [None]:
import matplotlib.pyplot as plt

x = range(0, num_iterations)
plt.plot(x, loss)


In [None]:
import imageio
import base64
import IPython
def embed_mp4(filename):
  """Embeds an mp4 file in the notebook."""
  video = open(filename,'rb').read()
  b64 = base64.b64encode(video)
  tag = '''
  <video width="640" height="480" controls>
    <source src="data:video/mp4;base64,{0}" type="video/mp4">
  Your browser does not support the video tag.
  </video>'''.format(b64.decode())

  return IPython.display.HTML(tag)

In [None]:
def create_policy_eval_video(policy, filename, num_episodes=1, fps=30):
  filename = filename + ".mp4"
  with imageio.get_writer(filename, fps=fps) as video:
    for _ in range(num_episodes):
        time_step = eval_tf_env.reset()
        eval_py_env.render()
        print(time_step)
        video.append_data(eval_py_env.render())
        while not time_step.is_last():
            action_step = policy.action(time_step)
            time_step = eval_tf_env.step(action_step.action)
            video.append_data(eval_py_env.render())
            
  return embed_mp4(filename)




#create_policy_eval_video(agent.policy, "trained-agent")

In [None]:

policy = agent.policy
for _ in range(1):
    time_step = t_tf_env.reset() #eval_tf_env.reset()
#     print(time_step,'\n \n')
#     print(train_env.time_step_spec())
    t_env_f.render()
    
    while not time_step.is_last():
        action_step = policy.action(time_step)
        time_step = t_tf_env.step(action_step.action)
        t_env_f.render()

#     action_step = policy.action(time_step)
#     time_step = t_tf_env.step(action_step.action)
#     t_env_f.render()
